# --------------------------------------------------------------------------------
# PREPARATIONS ----
# --------------------------------------------------------------------------------
# Author Info ----
# Florian Wintterlin
# July 31, 2023
# License:  License: CC-By Attribution 4.0 International

# Session Info ----

# R version 4.4.2 (2024-10-31)
# Platform: x86_64-apple-darwin20
# Running under: macOS Ventura 13.7.1


# Load libraries ----
pacman::p_unload(pacman::p_loaded(), character.only = TRUE)
set.seed(2000)

library(tidyverse) 
library(dplyr) 
library(magrittr) 
library(haven) 
library(descr) 
library(tidycomm) 
library(htmlTable) 
library(sjmisc) 
library(sjlabelled) 
library(psych) 
library(stats) 
library(report) 
library(naniar) 
library(correlation) 
library(xtable)
library(Hmisc) 
library(MVN)
library(lavaan)
library(writexl)
library(GPArotation) 
library(stargazer)
library(MissMech) 
library(finalfit) 
library(careless)
library(labelled)
library(skimr)
library(broom)
library(blorr)
library(splines)
library(arm)
library(sjPlot)
library(ggplot2)
library(ggridges)
library(RColorBrewer)
library(DescTools)
library(viridis)
library(patchwork)
library(multcomp)
library(car)
library(gridExtra)
library(lmtest)
library(olsrr)
library(sandwich)
library(MASS)
library(boot)
library(kableExtra)
library(cowplot)

# --------------------------------------------------------------------------------
# DATA WRANGLING  ----
# --------------------------------------------------------------------------------
## Build datasets ----
### Import raw data ----
ds_MisInfoCT <- readRDS("published/raw_data_CTvsMisinfo.rds")
str(ds_MisInfoCT)


### Exclude dropouts ----
# explore where people left the survey
ds_MisInfoCT %>% is.na() %>% colSums() 

# filter out all that did not finish the survey
ds_MisInfoCT_filtered_finished <- ds_MisInfoCT %>% filter(LASTPAGE == 18)

### Fix NAs for all variables ----
ds_MisInfoCT_filtered_finished[ds_MisInfoCT_filtered_finished == 99 | ds_MisInfoCT_filtered_finished == -99| ds_MisInfoCT_filtered_finished == -9] <- NA 
# Define missing value for -1 except for columns starting with C101, C102, and C103
columns_to_exclude <- c("C101", "C102", "C103")
columns_to_exclude_pattern <- paste(columns_to_exclude, collapse = "|")
ds_MisInfoCT_filtered_finished[!grepl(columns_to_exclude_pattern, names(ds_MisInfoCT_filtered_finished))] <- 
  lapply(ds_MisInfoCT_filtered_finished[!grepl(columns_to_exclude_pattern, names(ds_MisInfoCT_filtered_finished))], 
         function(x) { x[x == -1] <- NA; return(x) })

# Define missing values for Political Orientation
freq(ds_MisInfoCT$CA05)
freq(ds_MisInfoCT_filtered_finished$CA05)
ds_MisInfoCT_filtered_finished$CA05[ds_MisInfoCT_filtered_finished$CA05 %in% c("[NA] weiß nicht [-1]", "[NA] nicht beantwortet")] <- NA
freq(ds_MisInfoCT_filtered_finished$CA05)

### Select relevant items for analysis ----
ds_MisInfoCT_datamanag <- ds_MisInfoCT_filtered_finished %>%
  dplyr::select(id = REF, # ID
                gender = CA02, # Gender
                age = CA03_01, # Age
                Education = CA04, # Education
                polorientation = CA05, # Political orientation
                TrustMedia_1 = CA08_01, # Trust in Media - Public Radio
                TrustMedia_2 = CA08_02, # Trust in Media - Private Radio
                TrustMedia_3 = CA08_03, # Trust in Media - Newspapers
                TrustMedia_4 = CA08_04, # Trust in Media - TV
                TrustMedia_5 = CA08_05, # Trust in Media - Other News sites
                TrustMedia_6 = CA08_06, # Trust in Media - Blogs
                TrustMedia_7 = CA08_07, # Trust in Media - Social Media
                TrustMedia_8 = CA08_08, # Trust in Media - Alternative Media
                TrustMedia_9 = CA08_09, # Trust in Media - Video platforms
                TrustMedia_10 = CA08_10, # Trust in Media - Messenger
                TrustPolitical_1 = CA10_01, # Trust in Politics - Bundestag
                TrustPolitical_2 = CA10_02, # Trust in Politics - Justice
                TrustPolitical_3 = CA10_03, # Trust in Politics - Police
                TrustPolitical_4 = CA10_04, # Trust in Politics - Political Parties
                TrustPolitical_5 = CA10_05, # Trust in Politics - Politicians
                TrustScience_cat = TS01, # Trust in Science
                religiousity = CA09_01, # Religiosity
                Consp_systemic_scientific_COVID_5G = C101_16, # Conspiracy - systemic - COVID 5G COVID
                Consp_systemic_scientific_COVID_tracking = C101_17, # Conspiracy - systemic - COVID Peilsender
                Consp_systemic_scientific_COVID_Gates = C101_18, # Conspiracy - systemic - COVID Gates
                Consp_systemic_scientific_COVID_rights = C101_19, # Conspiracy - systemic - COVID Grundrechte
                Consp_systemic_scientific_COVID_Biow = C101_20, # Conspiracy - systemic - COVID Biowaffe
                Misinfo_COVID_Mask = C102_01, # Misinfo - COVID - Maske
                Misinfo_COVID_RNA = C102_02, # Misinfo - COVID - RNA-Impfstoff
                Misinfo_COVID_Flue = C102_03, # Misinfo - COVID - Grippe
                Misinfo_COVID_Coke = C102_04, # Misinfo - COVID - Glas Cola
                Misinfo_COVID_Misca = C102_05, # Misinfo - COVID - Fehlgeburten
                ConspMentality_1 = CM01_01, # Conspiracy Mentality 1
                ConspMentality_2 = CM01_02, # Conspiracy Mentality 2
                ConspMentality_3 = CM01_03, # Conspiracy Mentality 3
                ConspMentality_4 = CM01_04, # Conspiracy Mentality 4
                ConspMentality_5 = CM01_05, # Conspiracy Mentality 5
                EpistBel_Truth_1 = EB01_01, # Epistemic Beliefs - Truth is political 1
                EpistBel_Truth_2 = EB01_02, # Epistemic Beliefs - Truth is political 2
                EpistBel_Truth_3 = EB01_03, # Epistemic Beliefs - Truth is political 3
                EpistBel_Truth_4 = EB01_04, # Epistemic Beliefs - Truth is political 4
                EpistBel_IT_1 = EB01_05, # Epistemic Beliefs - Intuitive Thinking 1
                EpistBel_IT_2 = EB01_06, # Epistemic Beliefs - Intuitive Thinking 2
                EpistBel_IT_3 = EB01_07, # Epistemic Beliefs - Intuitive Thinking 3
                EpistBel_IT_4 = EB01_08, # Epistemic Beliefs - Intuitive Thinking 4
                EpistBel_IT_5 = EB01_09, # Epistemic Beliefs - Intuitive Thinking 5
                EpistBel_IT_6 = EB01_10, # Epistemic Beliefs - Intuitive Thinking 6
                EpistBel_AOT_1 = EB01_11, # Epistemic Beliefs - Actively Open-Minded Thinking 1
                EpistBel_AOT_2 = EB01_12, # Epistemic Beliefs - Actively Open-Minded Thinking 2
                EpistBel_AOT_3 = EB01_13, # Epistemic Beliefs - Actively Open-Minded Thinking 3
                EpistBel_AOT_4 = EB01_14, # Epistemic Beliefs - Actively Open-Minded Thinking 4
                EpistBel_AOT_5 = EB01_15, # Epistemic Beliefs - Actively Open-Minded Thinking 5
                EpistBel_AOT_6 = EB01_16, # Epistemic Beliefs - Actively Open-Minded Thinking 6
                MediaUse_TV = MU01_01, # Media Use TV
                MediaUse_NewsTV = MU01_02, # Media Use TV news
                MediaUse_Radio = MU01_03, # Media Use Radio
                MediaUse_Papers = MU01_04, # Media Use Newspapers
                MediaUse_Magaz = MU01_05, # Media Use Magazines
                MediaUse_WebNewsp = MU01_06, # Media Use Websites Newspapers
                MediaUse_WebMag = MU01_07, # Media Use Websites Magazines
                MediaUse_WebBroadcast = MU01_08, # Media Use Websites TV/Radio
                MediaUse_WebOther = MU01_09, # Media Use Websites other
                MediaUse_SoMe = MU01_10, # Media Use Social Media
                MediaUse_Messenger = MU01_11, # Media Use Messenger
                SDO1 = SD01_01, # Social Dominance 1
                SDO2 = SD01_02, # Social Dominance 2
                SDO3 = SD01_03, # Social Dominance 3
                SDO4 = SD01_04, # Social Dominance 4
                SoMe_FB = SM01_01, # Social Media Use - Facebook
                SoMe_FBMess = SM01_02, # Social Media Use - Facebook Messenger
                SoMe_WA = SM01_03, # Social Media Use - WhatsApp
                SoMe_Tw = SM01_04, # Social Media Use - Twitter
                SoMe_YT = SM01_05, # Social Media Use - YouTube
                SoMe_Insta = SM01_06, # Social Media Use - Instagram
                SoMe_Snap = SM01_07, # Social Media Use - Snapchat
                SoMe_TikTok = SM01_08, # Social Media Use - TikTok
                SoMe_LIn = SM01_09, # Social Media Use - LinkedIn or Xing
                SoMe_Telegram = SM01_10, # Social Media Use - Telegram
                SoMe_Reddit = SM01_11, # Social Media Use - Reddit
                AltMed_RT = AM01_01, # Alternative Media Use - RT deutsch
                AltMed_ET = AM01_02, # Alternative Media Use - Epoch Times
                AltMed_PI = AM01_03, # Alternative Media Use - PI News
                AltMed_Sput = AM01_04, # Alternative Media Use - Sputnik
                AltMed_Comp = AM01_05, # Alternative Media Use - Compact
                AltMed_JF = AM01_06, # Alternative Media Use - Junge Freiheit
                AltMed_DWN = AM01_07, # Alternative Media Use - Deutsche Wirtschaftsnachrichten
                AltMed_Tichy = AM01_08, # Alternative Media Use - Tichys Einblick
                AltMed_JourW = AM01_09, # Alternative Media Use - Journalistenwatch
                AltMed_Achgut = AM01_10, # Alternative Media Use - Achgut
                AltMed_Ken = AM01_11, # Alternative Media Use - KenFM
                AltMed_Rubi = AM01_12, # Alternative Media Use - Rubikon
                AltMed_JW = AM01_13, # Alternative Media Use - Junge Welt
                AltMed_Nachdenk = AM01_14, # Alternative Media Use - Nachchdenkseiten
                AltMed_Indymed = AM01_15, # Alternative Media Use - Indymedia
                AltMed_Kontrast = AM01_16, # Alternative Media Use - Kontrast
                AltMed_Messaging = AM01_17, # Alternative Media Use - Messaging Apps
                AltMed_Other = AM01_18 # Alternative Media Use - Other
  ) 


### Labeling variables ----
ds_MisInfoCT_datamanag %<>%
  var_labels(
    id = "ID",
    gender = "Gender",
    age = "Age",
    Education = "Education",
    polorientation = "Political Orientation",
    TrustMedia_1 = "Trust in Media - Public Radio",
    TrustMedia_2 = "Trust in Media - Private Radio",
    TrustMedia_3 = "Trust in Media - Newspapers",
    TrustMedia_4 = "Trust in Media - TV",
    TrustMedia_5 = "Trust in Media - Other News sites",
    TrustMedia_6 = "Trust in Media - Blogs",
    TrustMedia_7 = "Trust in Media - Social Media",
    TrustMedia_8 = "Trust in Media - Alternative Media",
    TrustMedia_9 = "Trust in Media - Video platforms",
    TrustMedia_10 = "Trust in Media - Messenger",
    TrustPolitical_1 = "Trust in Politics - Bundestag",
    TrustPolitical_2 = "Trust in Politics - Justice",
    TrustPolitical_3 = "Trust in Politics - Police",
    TrustPolitical_4 = "Trust in Politics - Political Parties",
    TrustPolitical_5 = "Trust in Politics - Politicians",
    TrustScience_cat = "Trust in Science",
    religiousity = "Religiousity",
    Consp_systemic_scientific_COVID_5G = "Conspiracy - Systemic - COVID 5G COVID",
    Consp_systemic_scientific_COVID_tracking = "Conspiracy - Systemic - COVID Peilsender",
    Consp_systemic_scientific_COVID_Gates = "Conspiracy - Systemic - COVID Gates",
    Consp_systemic_scientific_COVID_rights = "Conspiracy - Systemic - COVID Grundrechte",
    Consp_systemic_scientific_COVID_Biow = "Conspiracy - Systemic - COVID Biowaffe",
    Misinfo_COVID_Mask = "Misinfo - COVID - Maske",
    Misinfo_COVID_RNA = "Misinfo - COVID - RNA-Impfstoff",
    Misinfo_COVID_Flue = "Misinfo - COVID - Grippe",
    Misinfo_COVID_Coke = "Misinfo - COVID - Glas Cola",
    Misinfo_COVID_Misca = "Misinfo - COVID - Fehlgeburten",
    ConspMentality_1 = "Conspiracy Mentality 1",
    ConspMentality_2 = "Conspiracy Mentality 2",
    ConspMentality_3 = "Conspiracy Mentality 3",
    ConspMentality_4 = "Conspiracy Mentality 4",
    ConspMentality_5 = "Conspiracy Mentality 5",
    EpistBel_Truth_1 = "Epistemic Beliefs - Truth is political 1",
    EpistBel_Truth_2 = "Epistemic Beliefs - Truth is political 2",
    EpistBel_Truth_3 = "Epistemic Beliefs - Truth is political 3",
    EpistBel_Truth_4 = "Epistemic Beliefs - Truth is political 4",
    EpistBel_IT_1 = "Epistemic Beliefs - Intuitive Thinking 1",
    EpistBel_IT_2 = "Epistemic Beliefs - Intuitive Thinking 2",
    EpistBel_IT_3 = "Epistemic Beliefs - Intuitive Thinking 3",
    EpistBel_IT_4 = "Epistemic Beliefs - Intuitive Thinking 4",
    EpistBel_IT_5 = "Epistemic Beliefs - Intuitive Thinking 5",
    EpistBel_IT_6 = "Epistemic Beliefs - Intuitive Thinking 6",
    EpistBel_AOT_1 = "Epistemic Beliefs - Actively Open-Minded Thinking 1",
    EpistBel_AOT_2 = "Epistemic Beliefs - Actively Open-Minded Thinking 2",
    EpistBel_AOT_3 = "Epistemic Beliefs - Actively Open-Minded Thinking 3",
    EpistBel_AOT_4 = "Epistemic Beliefs - Actively Open-Minded Thinking 4",
    EpistBel_AOT_5 = "Epistemic Beliefs - Actively Open-Minded Thinking 5",
    EpistBel_AOT_6 = "Epistemic Beliefs - Actively Open-Minded Thinking 6",
    MediaUse_TV = "Media Use TV",
    MediaUse_NewsTV = "Media Use TV news",
    MediaUse_Radio = "Media Use Radio",
    MediaUse_Papers = "Media Use Newspapers",
    MediaUse_Magaz = "Media Use Magazines",
    MediaUse_WebNewsp = "Media Use Websites Newspapers",
    MediaUse_WebMag = "Media Use Websites Magazines",
    MediaUse_WebBroadcast = "Media Use Websites TV/Radio",
    MediaUse_WebOther = "Media Use Websites other",
    MediaUse_SoMe = "Media Use Social Media",
    MediaUse_Messenger = "Media Use Messenger",
    SDO1 = "Social Dominance 1",
    SDO2 = "Social Dominance 2",
    SDO3 = "Social Dominance 3",
    SDO4 = "Social Dominance 4",
    SoMe_FB = "Social Media Use - Facebook",
    SoMe_FBMess = "Social Media Use - Facebook Messenger",
    SoMe_WA = "Social Media Use - WhatsApp",
    SoMe_Tw = "Social Media Use - Twitter",
    SoMe_YT = "Social Media Use - YouTube",
    SoMe_Insta = "Social Media Use - Instagram",
    SoMe_Snap = "Social Media Use - Snapchat",
    SoMe_TikTok = "Social Media Use - TikTok",
    SoMe_LIn = "Social Media Use - LinkedIn or Xing",
    SoMe_Telegram = "Social Media Use - Telegram",
    SoMe_Reddit = "Social Media Use - Reddit",
    AltMed_RT = "Alternative Media Use - RT deutsch",
    AltMed_ET = "Alternative Media Use - Epoch Times",
    AltMed_PI = "Alternative Media Use - PI News",
    AltMed_Sput = "Alternative Media Use - Sputnik",
    AltMed_Comp = "Alternative Media Use - Compact",
    AltMed_JF = "Alternative Media Use - Junge Freiheit",
    AltMed_DWN = "Alternative Media Use - Deutsche Wirtschaftsnachrichten",
    AltMed_Tichy = "Alternative Media Use - Tichys Einblick",
    AltMed_JourW = "Alternative Media Use - Journalistenwatch",
    AltMed_Achgut = "Alternative Media Use - Achgut",
    AltMed_Ken = "Alternative Media Use - KenFM",
    AltMed_Rubi = "Alternative Media Use - Rubikon",
    AltMed_JW = "Alternative Media Use - Junge Welt",
    AltMed_Nachdenk = "Alternative Media Use - Nachchdenkseiten",
    AltMed_Indymed = "Alternative Media Use - Indymedia",
    AltMed_Kontrast = "Alternative Media Use - Kontrast",
    AltMed_Messaging = "Alternative Media Use - Messaging Apps",
    AltMed_Other = "Alternative Media Use - Other"
  )


### Print list of variables ----
variablelist<-data.frame(Variable = attr(ds_MisInfoCT_datamanag, "names"),
                         Label = as.vector(get_label(ds_MisInfoCT_datamanag))) %>% htmlTable(align = "l")

variablelist

### Adjust data types ----
# Structure of the dataset and variable types
str(ds_MisInfoCT_datamanag)

# Datatypes
ds_MisInfoCT_datamanag <- ds_MisInfoCT_datamanag %>%
  mutate(across(
    .cols = c(age, polorientation),  # Exclude education and gender columns
    .fns = as.numeric  # Convert selected columns to numeric
  ))

str(ds_MisInfoCT_datamanag)

# --------------------------------------------------------------------------------
# DATA CLEANING ----
# --------------------------------------------------------------------------------
## exclude cases with high rates of missing values ----
# compute percentage of missing values and add the value to a new variable
ds_MisInfoCT_datamanag <- ds_MisInfoCT_datamanag %>%
  mutate(percentage_missing = rowMeans(is.na(.)) * 100)
summary(ds_MisInfoCT_datamanag$percentage_missing)
descr::freq(ds_MisInfoCT_datamanag$percentage_missing)

# filter cases with more than 30% missing values
missings<-ds_MisInfoCT_datamanag %>%
  filter(percentage_missing > 30)

# exclude cases with more than 30% missing values (n = 24)
ds_MisInfoCT_datamanag <- ds_MisInfoCT_datamanag %>%
  filter(percentage_missing <= 30)

## exclude cases with straightlining ----
# select only items with scales where straightlining is problematic 
ds_MisInfoCT_filtered_irv <- ds_MisInfoCT_datamanag %>%
  dplyr::select(starts_with(c("EpistBel_Truth", "EpistBel_IT", 
                              "EpistBel_AOT", "SDO", "AMScepticism", "IMScepticism",
                              "SoMe")))

# compute irv-values and add them to a new variable
irv <- careless::irv(ds_MisInfoCT_filtered_irv,na.rm = TRUE)
irv
ds_MisInfoCT_filtered_irv$irv <- careless::irv(ds_MisInfoCT_filtered_irv,na.rm = TRUE)
summary(ds_MisInfoCT_filtered_irv$irv)

# show percentage of cases with values under 1.26 following Hong et al., 2020
mean(ds_MisInfoCT_filtered_irv$irv < 1.26) * 100 # result: 1,41%

# write irv-value in original dataset
ds_MisInfoCT_datamanag$irv <- careless::irv(ds_MisInfoCT_filtered_irv,na.rm = TRUE)

# filter cases with with values under 1,26 following Hong et al., 2020
missings<-ds_MisInfoCT_datamanag %>%
  filter(irv <= 1.26)

# exclude cases with values under 1,26 following Hong et al., 2020 (n = 28)
ds_MisInfoCT_datamanag <- ds_MisInfoCT_datamanag %>%
  filter(irv >= 1.26)


# --------------------------------------------------------------------------------
# RECODING OF ITEMS ---- 
# --------------------------------------------------------------------------------
## Recode reverse coded items for SDO1 and SDO3 ----
# Replace -1 with NA
ds_MisInfoCT_datamanag$SDO1[ds_MisInfoCT_datamanag$SDO1 == -1] <- NA

# Reverse code SD01 and create a new variable SD01_r
ds_MisInfoCT_datamanag$SDO1_r <- 12 - ds_MisInfoCT_datamanag$SDO1

# Compare frequencies of new and old variable
freq(ds_MisInfoCT_datamanag$SDO1)
freq(ds_MisInfoCT_datamanag$SDO1_r)

# Replace -1 with NA in SD03
ds_MisInfoCT_datamanag$SDO3[ds_MisInfoCT_datamanag$SDO3 == -1] <- NA

# Reverse code SD03 and create a new variable SD03_r
ds_MisInfoCT_datamanag$SDO3_r <- 12 - ds_MisInfoCT_datamanag$SDO3

# Compare frequencies of new and old variable
freq(ds_MisInfoCT_datamanag$SDO3)
freq(ds_MisInfoCT_datamanag$SDO3_r)


## Dummy coding CT and Misinfo items ----
# so that 1 means misperception and 0 correct perception  
# and dummy-coding of Trueinfo-Item (not reverse coded) so that 1 means correct perception and 0 misperception

# Define dummy-function, 1 means agreement or misinfo belief
dummy_code <- function(data, variables) {
  for (variable in variables) {
    data <- mutate(data, !!paste0(variable, "_dummy") := ifelse(!!sym(variable) == 1, 1, 0))
  }
  return(data)
}

# Select variables
selected_variables <- c(
  "Consp_systemic_scientific_COVID_5G", "Consp_systemic_scientific_COVID_tracking",
  "Consp_systemic_scientific_COVID_Gates", "Consp_systemic_scientific_COVID_rights", "Consp_systemic_scientific_COVID_Biow",
  "Misinfo_COVID_Mask" , "Misinfo_COVID_RNA" , "Misinfo_COVID_Flue" , "Misinfo_COVID_Coke" , "Misinfo_COVID_Misca" 
)

# Dummy code
ds_MisInfoCT_datamanag <- dummy_code(ds_MisInfoCT_datamanag, selected_variables)

# Display descriptives and check 

freq(ds_MisInfoCT_datamanag$Misinfo_COVID_Mask)
freq(ds_MisInfoCT_datamanag$Misinfo_COVID_Mask_dummy)


ds_MisInfoCT_datamanag %<>% var_labels(Consp_systemic_scientific_COVID_5G_dummy = "Conspiracy - systemic - COVID I - 5G dummy",
                             Consp_systemic_scientific_COVID_tracking_dummy = "Conspiracy - systemic - COVID II - tracking dummy", 
                             Consp_systemic_scientific_COVID_Gates_dummy = "Conspiracy - systemic - COVID III - Bill Gates dummy",
                             Consp_systemic_scientific_COVID_rights_dummy = "Conspiracy - systemic - COVID IV - rights restriction dummy",
                             Consp_systemic_scientific_COVID_Biow_dummy = "Conspiracy - systemic - COVID V - bio weapon dummy",
                             Misinfo_COVID_Mask_dummy = "Misinfo - COVID - Mask dummy",
                             Misinfo_COVID_RNA_dummy = "Misinfo - COVID - RNA dummy",
                             Misinfo_COVID_Flue_dummy = "Misinfo - COVID - Flue dummy",
                             Misinfo_COVID_Coke_dummy = "Misinfo - COVID - Testing Coke dummy",
                             Misinfo_COVID_Misca_dummy = "Misinfo - COVID - Miscarriage dummy",
)

## Recode media use ----
ds_MisInfoCT_datamanag %>% dplyr::select (MediaUse_TV, MediaUse_NewsTV)  %>% head()

ds_MisInfoCT_datamanag <- ds_MisInfoCT_datamanag %>%
  mutate(across(starts_with("MediaUse"), 
                ~ case_when(. == TRUE ~ 1,
                            . == FALSE ~ 0,
                            . == -1 ~ NA_real_,
                            TRUE ~ NA_real_),
                .names = "r_{col}"))

# check results
ds_MisInfoCT_datamanag %>% dplyr::select (MediaUse_TV, MediaUse_NewsTV)  %>% head()
freq(ds_MisInfoCT_datamanag$MediaUse_TV)
freq(ds_MisInfoCT_datamanag$r_MediaUse_TV)


## Recode alternative media use ----
ds_MisInfoCT_datamanag %>% dplyr::select (AltMed_RT, AltMed_ET)  %>% head()

ds_MisInfoCT_datamanag <- ds_MisInfoCT_datamanag %>%
  mutate(across(starts_with("AltMed"), 
                ~ case_when(. == TRUE ~ 1,
                            . == FALSE ~ 0,
                            . == -1 ~ NA_real_,
                            TRUE ~ NA_real_),
                .names = "r_{col}"))

# check results
ds_MisInfoCT_datamanag %>% dplyr::select (AltMed_RT, r_AltMed_RT)  %>% head()
freq(ds_MisInfoCT_datamanag$AltMed_RT)
freq(ds_MisInfoCT_datamanag$r_AltMed_RT)

## Recode trust in science ----
ds_MisInfoCT_datamanag %>% dplyr::select (TrustScience_cat)  %>% head()
freq(ds_MisInfoCT_datamanag$TrustScience_cat)

ds_MisInfoCT_datamanag <- ds_MisInfoCT_datamanag %>% mutate(TrustScience = 
                                          case_when(TrustScience_cat == "Gar kein Vertrauen [1]" ~ 1,
                                                    TrustScience_cat == "[2]" ~ 2,
                                                    TrustScience_cat == "[3]" ~ 3,
                                                    TrustScience_cat == "[4]" ~ 4,
                                                    TrustScience_cat == "[5]" ~ 5,
                                                    TrustScience_cat == "[6]" ~ 6,
                                                    TrustScience_cat == "Sehr großes Vertrauen [7]" ~ 7,
                                                    TrustScience_cat == "[NA] weiß nicht [-1]" ~ NA,
                                                    TrustScience_cat == "[NA] nicht beantwortet" ~ NA
                                          ))
freq(ds_MisInfoCT_datamanag$TrustScience)


#END DATA WRANGLING  
# --------------------------------------------------------------------------------
# INVESTIGATE FACTOR STRUCTURE ----
# --------------------------------------------------------------------------------
## PCA CTBs COVID ----
### All items ----
voi <- ds_MisInfoCT_datamanag%>%
  dplyr::select(Consp_systemic_scientific_COVID_5G_dummy , Consp_systemic_scientific_COVID_tracking_dummy , 
                Consp_systemic_scientific_COVID_Gates_dummy , Consp_systemic_scientific_COVID_rights_dummy ,
                Consp_systemic_scientific_COVID_Biow_dummy
  )

#Step 1: Inspection of items
raqMatrix<-cor(voi) #correlation matrix
bartlett.test(voi) #bartlett test (significant)
kmo <- function(x)
{
  x <- subset(x, complete.cases(x)) # Omit missing values
  r <- cor(x) # Correlation matrix
  r2 <- r^2 # Squared correlation coefficients
  i <- solve(r) # Inverse matrix of correlation matrix
  d <- diag(i) # Diagonal elements of inverse matrix
  p2 <- (-i/sqrt(outer(d, d)))^2 # Squared partial correlation coefficients
  diag(r2) <- diag(p2) <- 0 # Delete diagonal elements
  KMO <- sum(r2)/(sum(r2)+sum(p2))
  MSA <- colSums(r2)/(colSums(r2)+colSums(p2))
  return(list(KMO=KMO, MSA=MSA))
}
kmo(voi) #MSA for all items good?

#Step 2: Number of factors
items.parallel <- fa.parallel(voi, fa="pc") # 1 factor

pc1 <- principal(voi, nfactors = 5, rotate = "none")
plot(pc1$values, type = "b")
pc1 # 1 factor

# Step 3: PCA with 1 factor

pc2<- principal(voi, nfactors = 1, rotate = "none")
pc2 #inspecting communalities (h2): 

pc2 <- principal(voi, nfactors = 1, rotate = "oblimin")
print.psych(pc2, cut = 0.3, sort = TRUE)


## PCA Misinfo COVID ----
### All items ----
voi <- ds_MisInfoCT_datamanag%>%
  dplyr::select(Misinfo_COVID_Mask_dummy , Misinfo_COVID_RNA_dummy , 
                Misinfo_COVID_Flue_dummy , Misinfo_COVID_Coke_dummy ,
                Misinfo_COVID_Misca_dummy
                )

#Step 1: Inspection of items
raqMatrix<-cor(voi) #correlation matrix
bartlett.test(voi) #bartlett test (significant)
kmo <- function(x)
{
  x <- subset(x, complete.cases(x)) # Omit missing values
  r <- cor(x) # Correlation matrix
  r2 <- r^2 # Squared correlation coefficients
  i <- solve(r) # Inverse matrix of correlation matrix
  d <- diag(i) # Diagonal elements of inverse matrix
  p2 <- (-i/sqrt(outer(d, d)))^2 # Squared partial correlation coefficients
  diag(r2) <- diag(p2) <- 0 # Delete diagonal elements
  KMO <- sum(r2)/(sum(r2)+sum(p2))
  MSA <- colSums(r2)/(colSums(r2)+colSums(p2))
  return(list(KMO=KMO, MSA=MSA))
}
kmo(voi) #MSA for all items good?

#Step 2: Number of factors
items.parallel <- fa.parallel(voi, fa="pc") # 1 factor

pc1 <- principal(voi, nfactors = 5, rotate = "none")
plot(pc1$values, type = "b")
pc1 # 1 factor

# Step 3: PCA with 1 factor

pc2<- principal(voi, nfactors = 1, rotate = "none")
pc2 #inspecting communalities (h2): 

pc2 <- principal(voi, nfactors = 1, rotate = "oblimin")
print.psych(pc2, cut = 0.3, sort = TRUE) 

## PCAs Predictors ----
### Trust in media ----
voi <- ds_MisInfoCT_datamanag%>%
  dplyr::select(starts_with("TrustMedia"))

#Step 1: Inspection of items
raqMatrix<-cor(voi) #correlation matrix
bartlett.test(voi) #bartlett test (significant)
kmo <- function(x)
{
  x <- subset(x, complete.cases(x)) # Omit missing values
  r <- cor(x) # Correlation matrix
  r2 <- r^2 # Squared correlation coefficients
  i <- solve(r) # Inverse matrix of correlation matrix
  d <- diag(i) # Diagonal elements of inverse matrix
  p2 <- (-i/sqrt(outer(d, d)))^2 # Squared partial correlation coefficients
  diag(r2) <- diag(p2) <- 0 # Delete diagonal elements
  KMO <- sum(r2)/(sum(r2)+sum(p2))
  MSA <- colSums(r2)/(colSums(r2)+colSums(p2))
  return(list(KMO=KMO, MSA=MSA))
}
kmo(voi) #MSA for all items good?

#Step 2: Number of factors
items.parallel <- fa.parallel(voi, fa="pc") #2 factors

pc1 <- principal(voi, nfactors = 10, rotate = "none")
plot(pc1$values, type = "b")
pc1 # 2 factors

# Step 3: PCA with 2 factors

pc2<- principal(voi, nfactors = 2, rotate = "none")
pc2 #inspecting communalities (h2): 

pc2 <- principal(voi, nfactors = 2, rotate = "oblimin")
print.psych(pc2, cut = 0.3, sort = TRUE) 

# Two factors with trust in news media and alternative media, trust in websites 
# of internet providers (5) excluded due to double-loading


### Media Use with social media ----
voi <- ds_MisInfoCT_datamanag%>%
  dplyr::select(starts_with("r_MediaUse"))

#Step 1: Inspection of items
raqMatrix<-cor(voi) #correlation matrix
bartlett.test(voi) #bartlett test (significant)
kmo <- function(x)
{
  x <- subset(x, complete.cases(x)) # Omit missing values
  r <- cor(x) # Correlation matrix
  r2 <- r^2 # Squared correlation coefficients
  i <- solve(r) # Inverse matrix of correlation matrix
  d <- diag(i) # Diagonal elements of inverse matrix
  p2 <- (-i/sqrt(outer(d, d)))^2 # Squared partial correlation coefficients
  diag(r2) <- diag(p2) <- 0 # Delete diagonal elements
  KMO <- sum(r2)/(sum(r2)+sum(p2))
  MSA <- colSums(r2)/(colSums(r2)+colSums(p2))
  return(list(KMO=KMO, MSA=MSA))
}
kmo(voi) #MSA for all items good?

#Step 2: Number of factors
items.parallel <- fa.parallel(voi, fa="pc") #3 factors

pc1 <- principal(voi, nfactors = 11, rotate = "none")
plot(pc1$values, type = "b")
pc1 # 2-3 factors

# Step 3: PCA with 3 factors

pc2<- principal(voi, nfactors = 3, rotate = "none")
pc2 #inspecting communalities (h2): 

pc2 <- principal(voi, nfactors = 3, rotate = "oblimin")
print.psych(pc2, cut = 0.3, sort = TRUE) 

# Result: 3 factors:
# Factor 1: Online News
# Factor 2: Traditional News
# Factor 3: Social Media and Messenger


### Alternative Media Use  ----
voi <- ds_MisInfoCT_datamanag%>%
  dplyr::select(starts_with("r_AltMed"))
head(voi)
#### Step 1: Inspection of items
# Correlation matrix
raqMatrix<-cor(voi) 
round(raqMatrix, 2)

# Sample adequate for PCA? KMO-Index
psych::KMO(raqMatrix)
# Kaiser’s 1974 recommendations were:
# bare minimum of .5
# values between .5 and .7 as mediocre
# values between .7 and .8 as good
# values above .9 are superb 

# Result: overall very good, individual items under .50: r_AltMed_Other
voi <- voi %>% 
  dplyr::select(-r_AltMed_Other)

# New correlation matrix
raqMatrix<-cor(voi) 
round(raqMatrix, 2)

# Sample adequate for PCA? KMO-Index
psych::KMO(raqMatrix)

# Correlations of the items large enough?
psych::cortest.bartlett(voi)
# Result: Bartlett's test is significant supporting a factor analytical approach

# Multicollinearity or singularity in the data?
det(raqMatrix)
# Result: Determinant greater than 0.00001

# Summary:
# Data screening was conducted to determine the suitability of the data for principal components analyses. 
# The Kaiser-Meyer-Olkin measure of sampling adequacy (KMO; Kaiser, 1970) ranges from 0.00 to 1.00; 
# indicating that the patterns of correlations are relatively compact, and that component analysis should yield distinct and reliable components (Field, 2012). 
# In our dataset, the KMO value was ??, indicating acceptable sampling adequacy. 
# The Barlett’s Test of Sphericity examines whether the population correlation matrix resembles an identity matrix (Field, 2012). 
# When the p value for the Bartlett’s test is < .05, we are fairly certain we have clusters of correlated variables. In our dataset,χ2(xxx) = xxx, p < .001
# indicating the correlations between items are sufficiently large enough for principal components analysis. 
# The determinant of the correlation matrix alerts us to any issues of multicollinearity or singularity and should be larger than 0.00001. 
# Our determinant was 0.0075, supporting the suitability of our data for analysis.

#### Step 2: Number of factors
pca1 <- psych::principal(voi, nfactors = length(voi), rotate = "none")
pca1
# Result: eigenvalue > 1.0 (i.e., “Kaiser’s”) criteria suggests 3 factors, Joliffe’s criteria > 0.7 suggests 6 factors  

plot(pca1$values, type = "b")
#### Result: Point of inflexion of the screeplot suggests 1 or 3 factors


#### Step 3a: PCA with 3 factors
# Inspecting communalities
pc2<- principal(voi, nfactors = 3, rotate = "none")
pc2 
# Kaiser's criterion of eigenvalues > 1 is accurate if 
# 1) the communalities (h2) are greater than .70
# Result: not applicable in this case
# 2) the average communality is >.60
mean(pc2$communality)
# Result: Communalities are lower than the thresholds for single and overall communalities 

# Inspecting the difference between reproduced correlation matrix and the correlation matrix of the data
round(psych::factor.model(pc2$loadings), 3) # produces the reproduced correlation matrix
round(psych::factor.residuals(raqMatrix, pc2$loadings), 3) # calculate residuals for each pair
pca2_resids <- psych::factor.residuals(raqMatrix, pc2$loadings) # extract the residuals
pca2_resids <- as.matrix(pca2_resids[upper.tri(pca2_resids)]) # the object has the residuals in a single column
head(pca2_resids) # display the first 6 rows of the residuals
large.resid <- abs(pca2_resids) > 0.05 # how many residuals are greater than an absolute value of 0.05?
sum(large.resid) # large.resid
round(sum(large.resid)/nrow(pca2_resids), 3)
# Result: 68 residuals are greater than the absoulte value of .05 representing 50% of the total number of residuals. 

# Not more than 50% of the residuals should be greater than .05
round(sqrt(mean(pca2_resids^2)), 3)
# Result: mean of residuals is .074
# recommendation is to consider extracting more components if the value is higher than 0.08 (Field, 2012)
hist(pca2_resids)
# Histogram of residuals should be normally distributed
# Result: reasonably normally distributed


#### Step 3b: PCA with 1 factor
# Inspecting communalities
pc2<- principal(voi, nfactors = 1, rotate = "none")
pc2 
# Kaiser's criterion of eigenvalues > 1 is accurate if 
# 1) the communalities (h2) are greater than .70
# Result:  applicable in this case for most items
# 2) the average communality is >.60
mean(pc2$communality)
# Result: Communalities are lower than the thresholds for single and overall communalities 

# Inspecting the difference between reproduced correlation matrix and the correlation matrix of the data
round(psych::factor.model(pc2$loadings), 3) # produces the reproduced correlation matrix
round(psych::factor.residuals(raqMatrix, pc2$loadings), 3) # calculate residuals for each pair
pca2_resids <- psych::factor.residuals(raqMatrix, pc2$loadings) # extract the residuals
pca2_resids <- as.matrix(pca2_resids[upper.tri(pca2_resids)]) # the object has the residuals in a single column
head(pca2_resids) # display the first 6 rows of the residuals
large.resid <- abs(pca2_resids) > 0.05 # how many residuals are greater than an absolute value of 0.05?
sum(large.resid) # large.resid
round(sum(large.resid)/nrow(pca2_resids), 3)
# Result: 69 residuals are greater than the absoulte value of .05 representing 51% of the total number of residuals. 
# Not more than 50% of the residuals should be greater than .05
round(sqrt(mean(pca2_resids^2)), 3)
# Result: mean of residuals is .071
# recommendation is to consider extracting more components if the value is higher than 0.08 (Field, 2012)
hist(pca2_resids)
# Histogram of residuals should be normally distributed
# Result: not normally distributed

#### Result: One factor might be enough

#### Step 4: Rotated PCA
##### 1 factor
# compute pca
pc2 <- principal(voi, nfactors = 1, rotate = "oblimin")
print.psych(pc2, cut = 0.3, sort = TRUE) 

# show pattern matrix
pc2_table <- round(pc2$loadings, 3)
write.table(pc2_table, file = "_tables/pca_AltMedL_table_1factor.csv", sep = ",", col.names = TRUE,
            row.names = FALSE)
pc2_table

# Since there's only one factor, the pattern matrix is the same as the structure matrix


##### 3 factors
pc2 <- principal(voi, nfactors = 3, rotate = "oblimin")
print.psych(pc2, cut = 0.3, sort = TRUE) 

pcaOBL_table <- round(pc2$loadings, 3)
write.table(pcaOBL_table, file = "_tables/pca_AltMedL_table_3factors.csv", sep = ",", col.names = TRUE,
            row.names = FALSE)
pcaOBL_table

# show pattern matrix
pc2_table <- round(pc2$loadings, 3)
write.table(pc2_table, file = "_tables/pca_AltMedL_table_1factor.csv", sep = ",", col.names = TRUE,
            row.names = FALSE)
pc2_table

# show structure matrix (takes into account the relationship between the components/scales
# it is a product of the pattern matrix and the matrix containing the correlation coefficients between the components/scales)
# names(pcaOBL)
pc2$loadings %*% pc2$Phi

# Field's function to produce the structure matrix
factor.structure <- function(fa, cut = 0.2, decimals = 2) {
  structure.matrix <- psych::fa.sort(fa$loadings %*% fa$Phi)
  structure.matrix <- data.frame(ifelse(abs(structure.matrix) < cut,
                                        "", round(structure.matrix, decimals)))
  return(structure.matrix)
}

factor.structure(pc2, cut = 0.3)
# compare with pattern matrix
pc2_table
# Result: some values changed but the component membership of the items was stable

##### Result: stay with one factor because of the scree plot
# Four criteria were used to determine the number of components to extract: a priori theory,
# the scree plot, the eigenvalue-greater-than-one criteria, and the interpretability of the solution.

--------------------------------------------------------------------------------
# --------------------------------------------------------------------------------
# SET UP RELEVANT VARIABLES AND CHECK RELIABILITY ----
# --------------------------------------------------------------------------------
## Sociodemographics ---------------------------------------------------------------------------------------
### Gender ------------------------------------------------------
freq(ds_MisInfoCT_datamanag$gender)
ds_MisInfoCT_datamanag <- ds_MisInfoCT_datamanag %>% mutate(gender, gender_male = ifelse(gender == "männlich", 
                                                                     1, 
                                                                     0))
freq(ds_MisInfoCT_datamanag$gender_male)
ds_MisInfoCT_datamanag <- ds_MisInfoCT_datamanag %>% mutate(gender_male = as.factor(gender_male))

#ds_MisInfoCT_datamanag$gender_male <- factor(ds_MisInfoCT_datamanag$gender_male,
#                                                  levels = c(0, 1),
#                                                  labels = c("Female", "Male"))
#freq(ds_MisInfoCT_datamanag$gender_male)

### Education ------------------------------------------------------
ds_MisInfoCT_datamanag <- ds_MisInfoCT_datamanag %>% mutate(edu_high = 
                                          case_when(Education == "kein Abschluss" ~ 0,
                                                    Education == "Haupt-/Volksschulabschluss" ~ 0,
                                                    Education == "Mittlere Reife, Realschulabschluss, Fachschulreife" ~ 0,
                                                    Education == "Abschluss der Polytechnischen Oberschule (8./10. Klasse)" ~ 0,
                                                    Education == "Fachhochschulreife, Abschluss einer Fachoberschule" ~ 0,
                                                    Education == "Abitur, allgemeine oder fachgebundene Hochschulreife" ~ 1,
                                                    Education == "Fach-/Hochschulstudium" ~ 1,
                                                    Education == "anderer Schulabschluss" ~ NA
                                          ))
freq(ds_MisInfoCT_datamanag$edu_high)
ds_MisInfoCT_datamanag <- ds_MisInfoCT_datamanag %>% mutate(edu_high = as.factor(edu_high))
freq(ds_MisInfoCT_datamanag$edu_high)

## COVID Misinfo ----
### Reliability ----
psych::alpha(
  ds_MisInfoCT_datamanag[, c(
    "Misinfo_COVID_Mask_dummy",
    "Misinfo_COVID_RNA_dummy", 
    "Misinfo_COVID_Flue_dummy",
    "Misinfo_COVID_Coke_dummy", 
    "Misinfo_COVID_Misca_dummy"
  )]
)

### Compute indices ----
#### Sum index ----
ds_MisInfoCT_datamanag %<>% 
  mutate(
    misinfo_covid_beliefs = rowSums(
      dplyr::select(
        ds_MisInfoCT_datamanag, 
        Misinfo_COVID_Mask_dummy,
        Misinfo_COVID_RNA_dummy,
        Misinfo_COVID_Flue_dummy,
        Misinfo_COVID_Coke_dummy,
        Misinfo_COVID_Misca_dummy
      ), na.rm = TRUE
    )
  ) %>% 
  var_labels(misinfo_covid_beliefs = "Belief in COVID misinfo (sum)")

# Fix NaNs
ds_MisInfoCT_datamanag$misinfo_covid_beliefs[is.nan(ds_MisInfoCT_datamanag$misinfo_covid_beliefs)] <- NA

#### Dichotomous index ----
ds_MisInfoCT_datamanag %<>% 
  mutate(
    misinfo_covid_beliefs_dummy = case_when(
      misinfo_covid_beliefs >= 1 ~ 1,
      misinfo_covid_beliefs == 0 ~ 0,
      TRUE ~ NA_real_
    ),
    misinfo_covid_beliefs_dummy = as.factor(misinfo_covid_beliefs_dummy)
  ) %>% 
  var_labels(misinfo_covid_beliefs_dummy = "Belief in COVID misinfo (dichotomous)")

### Verify changes ----
head(ds_MisInfoCT_datamanag)

descr::freq(ds_MisInfoCT_datamanag$misinfo_covid_beliefs)
descr::freq(ds_MisInfoCT_datamanag$misinfo_covid_beliefs_dummy)
typeof(ds_MisInfoCT_datamanag$misinfo_covid_beliefs_dummy)

## COVID CTB ----
### Reliability ----
psych::alpha(
  ds_MisInfoCT_datamanag[, c(
    "Consp_systemic_scientific_COVID_5G_dummy",
    "Consp_systemic_scientific_COVID_tracking_dummy",
    "Consp_systemic_scientific_COVID_Gates_dummy",
    "Consp_systemic_scientific_COVID_rights_dummy",
    "Consp_systemic_scientific_COVID_Biow_dummy"
  )]
)

### Compute indices ----
#### Sum index ----
ds_MisInfoCT_datamanag %<>% 
  mutate(
    COVID_CTB = rowSums(
      dplyr::select(
        ds_MisInfoCT_datamanag, 
        Consp_systemic_scientific_COVID_5G_dummy,
        Consp_systemic_scientific_COVID_tracking_dummy,
        Consp_systemic_scientific_COVID_Gates_dummy,
        Consp_systemic_scientific_COVID_rights_dummy,
        Consp_systemic_scientific_COVID_Biow_dummy
      ), na.rm = TRUE
    )
  ) %>% 
  var_labels(COVID_CTB = "Belief in COVID-CT (sum)")

# Fix NaNs
ds_MisInfoCT_datamanag$COVID_CTB[is.nan(ds_MisInfoCT_datamanag$COVID_CTB)] <- NA

#### Dichotomous index ----
ds_MisInfoCT_datamanag %<>% 
  mutate(
    COVID_CTB_dummy = case_when(
      COVID_CTB >= 1 ~ 1,
      COVID_CTB == 0 ~ 0,
      TRUE ~ NA_real_
    ),
    COVID_CTB_dummy = as.factor(COVID_CTB_dummy)
  ) %>% 
  var_labels(COVID_CTB_dummy = "Belief in COVID CT (dichotomous)")

### Verify changes ----
head(ds_MisInfoCT_datamanag)

descr::freq(ds_MisInfoCT_datamanag$COVID_CTB_dummy)
descr::freq(ds_MisInfoCT_datamanag$COVID_CTB)
typeof(ds_MisInfoCT_datamanag$COVID_CTB_dummy)

## Predictors ----
### Reliability ----
calculate_alpha <- function(variables) {
  relevant_variables <- ds_MisInfoCT_datamanag[, variables]
  psych::alpha(relevant_variables)
}

#### Conspiracy Mentality ----
calculate_alpha(c("ConspMentality_1", "ConspMentality_2", "ConspMentality_3", "ConspMentality_4", "ConspMentality_5")) 

#### Epistemic Beliefs - Truth is political  ----
calculate_alpha(c("EpistBel_Truth_1", "EpistBel_Truth_2", "EpistBel_Truth_3", "EpistBel_Truth_4"))

#### Epistemic Beliefs - Intuitive Thinking ----
calculate_alpha(c("EpistBel_IT_1", "EpistBel_IT_2", "EpistBel_IT_3", "EpistBel_IT_4", "EpistBel_IT_5", "EpistBel_IT_6"))

#### Epistemic Beliefs - Actively Open-Minded Thinking ----
calculate_alpha(c("EpistBel_AOT_1", "EpistBel_AOT_2", "EpistBel_AOT_3", "EpistBel_AOT_4", "EpistBel_AOT_5", "EpistBel_AOT_6"))

#### Social Dominance Orientation ----
calculate_alpha(c("SDO1_r", "SDO2", "SDO3_r", "SDO4"))

#### Trust in Legacy Media ----
calculate_alpha(c("TrustMedia_1", "TrustMedia_2", "TrustMedia_3", "TrustMedia_4"))

#### Trust in Alternative News Channels ----
calculate_alpha(c("TrustMedia_6", "TrustMedia_7", "TrustMedia_8", "TrustMedia_9","TrustMedia_10"))

#### Media Use Factor 1: Traditional ----
calculate_alpha(c("r_MediaUse_TV", "r_MediaUse_NewsTV","r_MediaUse_Radio","r_MediaUse_Papers","r_MediaUse_Magaz"))

typeof(ds_MisInfoCT_datamanag$r_MediaUse_TV)
ds_MisInfoCT_datamanag$r_MediaUse_TV<-as.numeric(ds_MisInfoCT_datamanag$r_MediaUse_TV)
ds_MisInfoCT_datamanag$r_MediaUse_NewsTV<-as.numeric(ds_MisInfoCT_datamanag$r_MediaUse_NewsTV)
ds_MisInfoCT_datamanag$r_MediaUse_Radio<-as.numeric(ds_MisInfoCT_datamanag$r_MediaUse_Radio)
ds_MisInfoCT_datamanag$r_MediaUse_Papers<-as.numeric(ds_MisInfoCT_datamanag$r_MediaUse_Papers)
ds_MisInfoCT_datamanag$r_MediaUse_Magaz<-as.numeric(ds_MisInfoCT_datamanag$r_MediaUse_Magaz)

typeof(ds_MisInfoCT_datamanag$r_MediaUse_TV)
descr::freq(ds_MisInfoCT_datamanag$r_MediaUse_TV)

selected_items <- ds_MisInfoCT_datamanag[, c("r_MediaUse_TV", "r_MediaUse_NewsTV", "r_MediaUse_Radio", "r_MediaUse_Papers", "r_MediaUse_Magaz")]
alpha(selected_items)

#### Media Use Factor 2: Online ----
calculate_alpha(c("r_MediaUse_WebNewsp","r_MediaUse_WebMag","r_MediaUse_WebBroadcast","r_MediaUse_WebOther"))
#### Media Use Factor 3: Social Media ----
calculate_alpha(c("r_MediaUse_SoMe", "r_MediaUse_Messenger"))
## Compute indices ----
# Replace all -1 with NA in columns starting with specified prefixes
freq(ds_MisInfoCT_datamanag$ConspMentality_1)
ds_MisInfoCT_datamanag <- ds_MisInfoCT_datamanag %>%
  mutate(across(starts_with("ConspMentality"), ~ ifelse(. == -1, NA, .)),
         across(starts_with("EpistBel"), ~ ifelse(. == -1, NA, .)),
         across(starts_with("SDO"), ~ ifelse(. == -1, NA, .)),
         across(starts_with("TrustMedia"), ~ ifelse(. == -1, NA, .)),
         across(starts_with("TrustPolitical"), ~ ifelse(. == -1, NA, .))
  )

freq(ds_MisInfoCT_datamanag$ConspMentality_1)
freq(ds_MisInfoCT_datamanag$TrustPolitical_1)
freq(ds_MisInfoCT_datamanag$r_AltMed_RT)


# Compute indices 
library(sjlabelled)

ds_MisInfoCT_datamanag <- ds_MisInfoCT_datamanag %>%
  mutate(
    ConspMent_index = rowMeans(dplyr::select(., starts_with("ConspMentality")), na.rm = TRUE),
    EpistBel_Truth_index = rowMeans(dplyr::select(., starts_with("EpistBel_Truth")), na.rm = TRUE),
    EpistBel_IT_index = rowMeans(dplyr::select(., starts_with("EpistBel_IT")), na.rm = TRUE),
    EpistBel_AOT_index = rowMeans(dplyr::select(., starts_with("EpistBel_AOT")), na.rm = TRUE),
    SDO_index = rowMeans(dplyr::select(., starts_with("SDO")), na.rm = TRUE),
    TrustMedia_index = rowMeans(dplyr::select(., c("TrustMedia_1", "TrustMedia_2", "TrustMedia_3", "TrustMedia_4")), na.rm = TRUE),
    TrustAltMed_index = rowMeans(dplyr::select(., c("TrustMedia_6", "TrustMedia_7", "TrustMedia_8", "TrustMedia_9", "TrustMedia_10")), na.rm = TRUE),
    TrustPolitical_index = rowMeans(dplyr::select(., starts_with("TrustPolitical")), na.rm = TRUE),
    MediaUse_traditional_index = rowSums(dplyr::select(., c("r_MediaUse_TV", "r_MediaUse_NewsTV", "r_MediaUse_Radio", "r_MediaUse_Papers","r_MediaUse_Magaz")), na.rm = TRUE),
    MediaUse_online_index = rowSums(dplyr::select(., c("r_MediaUse_WebNewsp", "r_MediaUse_WebMag", "r_MediaUse_WebBroadcast", "r_MediaUse_WebOther")), na.rm = TRUE),
    MediaUse_socialMedia_index = rowSums(dplyr::select(., c("r_MediaUse_SoMe", "r_MediaUse_Messenger")), na.rm = TRUE),
    AltMed_index = rowSums(dplyr::select(., starts_with("r_AltMed")), na.rm = TRUE),
  ) %>%
  var_labels(
    ConspMent_index = "Conspiracy Mentality (mean)",
    EpistBel_Truth_index = "Epistemic Beliefs Truth is political (mean)",
    EpistBel_IT_index = "Epistemic Beliefs Intuitive thinking (mean)",
    EpistBel_AOT_index = "Epistemic Beliefs Open minded thinking (mean)",
    SDO_index = "Social Dominance Orientation (mean)",
    TrustMedia_index = "Trust in News Media (mean)",
    TrustAltMed_index = "Trust in Alternative Media (mean)",
    TrustPolitical_index = "Trust in Politics (mean)",
    MediaUse_traditional_index = "Media Use Traditional (sum)",
    MediaUse_online_index = "Media Use Online (sum)",
    MediaUse_socialMedia_index = "Social Media Use (sum)",
    AltMed_index = "Alternative media use (sum)"
  )


# Fix NaNs for all indices
vars_to_fix <- c("ConspMent_index", 
                 "EpistBel_Truth_index", "EpistBel_IT_index","EpistBel_AOT_index",
                 "SDO_index", 
                 "TrustMedia_index","TrustAltMed_index",
                 "TrustPolitical_index",
                 "MediaUse_traditional_index","MediaUse_online_index","MediaUse_socialMedia_index",
                 "AltMed_index")

for (var in vars_to_fix) {
  ds_MisInfoCT_datamanag[[var]][is.nan(ds_MisInfoCT_datamanag[[var]])] <- NA
}

# Check results
ds_MisInfoCT_datamanag %>% dplyr::select (starts_with("ConspMent"))  %>% head()
ds_MisInfoCT_datamanag %>% dplyr::select (starts_with("EpistBel_Truth"))  %>% head()
ds_MisInfoCT_datamanag %>% dplyr::select (starts_with("EpistBel_IT"))  %>% head()
ds_MisInfoCT_datamanag %>% dplyr::select (starts_with("EpistBel_AOT"))  %>% head()
ds_MisInfoCT_datamanag %>% dplyr::select (starts_with("SDO"))  %>% head()
ds_MisInfoCT_datamanag %>% dplyr::select (TrustMedia_1, TrustMedia_2, TrustMedia_3, TrustMedia_4,TrustMedia_5,TrustMedia_index)  %>% head()
ds_MisInfoCT_datamanag %>% dplyr::select (TrustMedia_6, TrustMedia_7, TrustMedia_8, TrustMedia_9,TrustMedia_9,TrustMedia_10,TrustAltMed_index)  %>% head()
ds_MisInfoCT_datamanag %>% dplyr::select (starts_with("TrustPolitical"))  %>% head()
ds_MisInfoCT_datamanag %>% dplyr::select (MediaUse_TV, MediaUse_NewsTV, MediaUse_Radio, MediaUse_Papers, MediaUse_Magaz, MediaUse_traditional_index)  %>% head()
ds_MisInfoCT_datamanag %>% dplyr::select (MediaUse_WebNewsp, MediaUse_WebMag, MediaUse_WebBroadcast, MediaUse_WebOther, MediaUse_online_index)  %>% head()
ds_MisInfoCT_datamanag %>% dplyr::select (MediaUse_SoMe, MediaUse_Messenger, MediaUse_socialMedia_index)  %>% head()
ds_MisInfoCT_datamanag %>% dplyr::select (starts_with("AltMed"))  %>% head()

# Show descriptives
ds_MisInfoCT_datamanag %>% dplyr::select(ConspMent_index,
                               EpistBel_Truth_index,EpistBel_IT_index,EpistBel_AOT_index,
                               SDO_index,
                               TrustMedia_index,TrustAltMed_index,
                               TrustPolitical_index,
                               MediaUse_traditional_index,MediaUse_online_index,MediaUse_socialMedia_index,
                               AltMed_index) %>% descr()



# --------------------------------------------------------------------------------
# CREATE FINAL DATASET ----
# --------------------------------------------------------------------------------
## Select relevant items for analysis ----
ds_MisInfoCT_analysis <- ds_MisInfoCT_datamanag %>%
  dplyr::select(id,
                COVID_CTB,
                COVID_CTB_dummy,
                misinfo_covid_beliefs,
                misinfo_covid_beliefs_dummy,
                gender_male, 
                age, 
                edu_high,
                polorientation, 
                TrustMedia_index, 
                TrustAltMed_index,
                TrustScience, 
                religiousity, 
                ConspMent_index,
                EpistBel_Truth_index,
                EpistBel_IT_index,
                EpistBel_AOT_index,
                SDO_index,
                TrustMedia_index,
                TrustPolitical_index,
                MediaUse_traditional_index,
                MediaUse_online_index,
                MediaUse_socialMedia_index,
                AltMed_index)

## Labeling variables ----
ds_MisInfoCT_analysis %<>%
  var_labels(
    age = "Age",
    gender_male = "male",
    edu_high = "Highly educated",
    TrustScience = "Trust in Science", 
    polorientation = "Political Orientation", 
    COVID_CTB_dummy = "Belief in COVID-CT (dummy)",
    misinfo_covid_beliefs_dummy	= "Belief in COVID misinfo (dummy)"
  )

## Print list of variables ----
variablelist<-data.frame(Variable = attr(ds_MisInfoCT_analysis, "names"),
                         Label = as.vector(get_label(ds_MisInfoCT_analysis))) %>% htmlTable(align = "l")

writeLines(as.character(variablelist), "_tables/variable_list_analysis.html")
variablelist

# Check for proper types of variables ---
glimpse(ds_MisInfoCT_analysis)
ds_MisInfoCT_analysis$religiousity <- as.numeric(ds_MisInfoCT_analysis$religiousity)

## Save as RDS ----
saveRDS(ds_MisInfoCT_analysis, "published/ds_analysis_MisinfoCT.rds")

# --------------------------------------------------------------------------------
# EXAMINE MISSING VALUES ----
# --------------------------------------------------------------------------------
## Numerical inspection of missings ----
miss<-miss_var_summary(ds_MisInfoCT_analysis)
View(miss) # summary of missing values 

n_miss <- n_miss(ds_MisInfoCT_analysis) 
n_miss #total number of missing values

pct_miss(ds_MisInfoCT_analysis) #Percentage of missing values in the whole dataset

pctmiss_cas<- pct_miss_case(ds_MisInfoCT_analysis) 
pctmiss_cas #pct of cases with missing values

miss_n <- apply(ds_MisInfoCT_analysis, MARGIN = 1, function(x) sum(is.na(x)))
non_miss_n <- apply (ds_MisInfoCT_analysis, MARGIN = 1, function(x) sum(!is.na(x)))
ds_MisInfoCT_analysis$miss_p = round ((miss_n/non_miss_n),2) #safe prop_miss as additional row

## Visual inspection of missings ----
ds_MisInfoCT_analysis %>% 
  missing_plot

# Overall, 1.03 % of values were missing, however, share of missings differed largely between variables
# number of missings for trust in science and income were above the ignorable threshold (5%, see Kline 1998)

## Test for mcar  ----
# only variables with missing values
voi_mcar = c("TrustScience","TrustMedia_index","religiousity","SDO_index","TrustAltMed_index",
             "EpistBel_Truth_index","ConspMent_index","EpistBel_AOT_index",
             "EpistBel_IT_index","TrustPolitical_index","polorientation")

ds_MisInfoCT_analysis %>% 
  dplyr::select(voi_mcar) %>% 
  MissMech::TestMCARNormality()
## Result: non-parametric test did not reject the null-hypotheses that missing values were completely at random ----

## Impute data using the mice-package ----
# The mice package implements a multiple imputation methods for multivariate missing data. It can impute mixes of continuous, 
# binary, unordered categorical and ordered categorical data, as well as two-level data. 
#  original article describing the software, as well as the source package (Buuren and Groothuis-Oudshoorn 2011)
library(dplyr)
library(mice)

# Select variables with missing data and id as identifier
imp <- ds_MisInfoCT_analysis %>% 
  dplyr::select(voi_mcar, id)

# Impute 5 times
mice_mice <- mice(data = imp, m = 5)
summary(mice_mice)

# delete columns created by mice
completedData <- complete(mice_mice, 'long')
completedData <- completedData %>% dplyr::select(-.id,-.imp)

# take average of the imputed values using id as identifier
a <- aggregate(. ~ id, data = completedData, FUN = mean)
head(a)

# Identify the columns to replace in the original dataset
cols_to_replace <- c("TrustScience", "TrustMedia_index", "religiousity", "SDO_index", 
                     "TrustAltMed_index", "EpistBel_Truth_index",  
                     "ConspMent_index", "EpistBel_AOT_index", 
                     "EpistBel_IT_index", "TrustPolitical_index","polorientation")

# Replace values in ds_MisInfoCT_analysis with values from dataset a
ds_MisInfoCT_analysis[cols_to_replace] <- a[match(ds_MisInfoCT_analysis$id, a$id), cols_to_replace]

# check for remaining missings
miss<-miss_var_summary(ds_MisInfoCT_analysis)
View(miss)
## Check structure and data types ---- 
skimr::skim(ds_MisInfoCT_analysis)

ds_MisInfoCT_analysis <- ds_MisInfoCT_analysis %>%
  mutate(
    gender_male = as.factor(gender_male),
    edu_high = as.factor(edu_high)
  )
skimr::skim(ds_MisInfoCT_analysis)

# Assign labels to each variable
ds_MisInfoCT_analysis <- ds_MisInfoCT_analysis %>%
  mutate(COVID_CTB_dummy = set_label(COVID_CTB_dummy, "COVID Conspiracy Theory Belief (binary)"),
         misinfo_covid_beliefs_dummy = set_label(misinfo_covid_beliefs_dummy, "Misinformation COVID Beliefs (binary)"),
         COVID_CTB = set_label(COVID_CTB, "COVID Conspiracy Theory Belief (continuous)"),
         misinfo_covid_beliefs = set_label(misinfo_covid_beliefs, "Misinformation COVID Beliefs (continuous)"),
         ConspMent_index = set_label(ConspMent_index, "Conspiracy Mentality"),
         MediaUse_traditional_index = set_label(MediaUse_traditional_index, "Traditional Media Use"),
         MediaUse_online_index = set_label(MediaUse_online_index, "Online Media Use"),
         MediaUse_socialMedia_index = set_label(MediaUse_socialMedia_index, "Social Media Use Total"),
         AltMed_index = set_label(AltMed_index, "Alternative Media"),
         EpistBel_IT_index = set_label(EpistBel_IT_index, "Intuitive Thinking"),
         EpistBel_Truth_index = set_label(EpistBel_Truth_index, "Truth is Political"),
         EpistBel_AOT_index = set_label(EpistBel_AOT_index, "Actively Open-Minded Thinking"),
         SDO_index = set_label(SDO_index, "Social Dominance Orientation"),
         TrustMedia_index = set_label(TrustMedia_index, "Trust in Media"),
         TrustAltMed_index = set_label(TrustAltMed_index, "Trust in Alternative Media"),
         TrustScience = set_label(TrustScience, "Trust in Science"),
         TrustPolitical_index = set_label(TrustPolitical_index, "Trust in Politics"),
         gender_male = set_label(gender_male, "Gender Male"),
         age = set_label(age, "Age"),
         edu_high = set_label(edu_high, "Education High"),
         religiousity = set_label(religiousity, "Religiosity"),
         polorientation = set_label(polorientation, "Political Orientation"))
# Check the labels to ensure they are correctly assigned
str(ds_MisInfoCT_analysis)

ds_MisInfoCT_analysis <- dplyr::select(ds_MisInfoCT_analysis, -miss_p)

## Save as RDS ----
saveRDS(ds_MisInfoCT_analysis, "_data/ds_MisInfoCT_analysis_imputed.rds")

# --------------------------------------------------------------------------------
# CORRELATIONS ----
# --------------------------------------------------------------------------------
## Data wrangling ----
ds_corr <- ds_MisInfoCT_analysis
skimr::skim(ds_corr)
str(ds_corr)

# --------------------------------------------------------------------------------
## Full interactive correlation table with descriptives (numeric and factor) using DT ----
library(dplyr)
library(sjlabelled)
library(Hmisc)
library(tidyr)
library(DT)
library(tibble)


### Step 1: Compute Descriptives for Numeric Variables ----
numeric_vars <- ds_corr %>%
  dplyr::select_if(is.numeric) %>%
  names()

summary_df <- ds_corr %>%
  dplyr::select(all_of(numeric_vars)) %>%
  summarise(across(everything(), list(
    mean = mean,
    sd = sd,
    min = min,
    max = max
  ), na.rm = TRUE)) %>%
  pivot_longer(cols = everything(), names_to = "variable", values_to = "value")
print(summary_df)

# Extract variable name, measure, and value
summary_df <- summary_df %>%
  mutate(variable_name = sub("_(mean|sd|min|max)$", "", variable),
         measure = sub(".*_(mean|sd|min|max)$", "\\1", variable)) %>%
  pivot_wider(names_from = measure,
              values_from = value)

# Group by variable_name and summarize to get one row per variable
summary_df <- summary_df %>%
  group_by(variable_name) %>%
  summarise(mean = round(mean(mean, na.rm = TRUE), 2),  # Round mean to 2 decimal places
            sd = round(mean(sd, na.rm = TRUE), 2),      # Round sd to 2 decimal places
            min = mean(min, na.rm = TRUE),
            max = mean(max, na.rm = TRUE))

print(summary_df,n=50)

### Step 2: Convert Factor Variables to Numeric ----
ds_corr <- ds_corr %>%
  mutate_if(~ is.factor(.) && all(levels(.) %in% c("0", "1")), ~ as.numeric(.) - 1)

### Step 3: Compute Percentages for Character Variables ----
character_variables <- ds_corr %>%
  dplyr::select(COVID_CTB_dummy,misinfo_covid_beliefs_dummy,gender_male,edu_high
  )

percentages <- character_variables %>%
  summarise_all(~ mean(. == 1) * 100) %>%
  round(2)

percentages_df <- as.data.frame(t(percentages))
colnames(percentages_df) <- "% of the sample"
percentages_df <- percentages_df %>%
  rownames_to_column(var = "variable")
percentages_df

### Step 4: Create Correlation Matrix ----
cor_results <- Hmisc::rcorr(as.matrix(ds_corr %>% dplyr::select(where(is.numeric))))
cor_matrix <- cor_results$r
p_values <- cor_results$P
cor_matrix <- round(cor_matrix, 2)

cor_matrix <- ifelse(p_values < 0.001, paste0(cor_matrix, "***"), cor_matrix)
cor_matrix <- ifelse(p_values >= 0.001 & p_values < 0.01, paste0(cor_matrix, "** "), cor_matrix)
cor_matrix <- ifelse(p_values >= 0.01 & p_values < 0.05, paste0(cor_matrix, "*  "), cor_matrix)
cor_matrix <- ifelse(p_values >= 0.05, paste0(cor_matrix, "   "), cor_matrix)
diag(cor_matrix) <- ""
cor_table <- as.data.frame(cor_matrix)
str(cor_table)

### Step 5: Combine Descriptive Statistics and Percentages ----
merged_data <- merge(summary_df, percentages_df, by.x = "variable_name", by.y = "variable", all = TRUE)
colnames(merged_data)[1] <- "variable"
merged_data[is.na(merged_data)] <- ""
merged_data
str(merged_data)

### Step 6: Combine Correlation Matrix with Descriptive Statistics and Percentages ----
cor_table$variable <- rownames(cor_table)
rownames(cor_table) <- NULL  # Remove row names
merged_dataset <- merge(cor_table, merged_data, by = "variable", all.x = TRUE)
merged_dataset


desired_order <- c("variable", "mean", "sd", "min", "max", "% of the sample", setdiff(names(merged_dataset), c("variable", "mean", "sd", "min", "max", "% of the sample")))
merged_dataset <- merged_dataset[, desired_order]
merged_dataset

### Step 7: Rename Columns and rows ----
merged_dataset <- merged_dataset %>%
  rename(
    "COVID Conspiracy Belief (continuous)" = COVID_CTB,
    "COVID Conspiracy Belief (binary)" = COVID_CTB_dummy,
    "COVID Misinformation Belief (continuous)" = misinfo_covid_beliefs,
    "COVID Misinformation Belief (binary)" = misinfo_covid_beliefs_dummy,
    "Male" = gender_male,
    "Age" = age,
    "High Education" = edu_high,
    "Political Orientation" = polorientation,
    "Trust Media" = TrustMedia_index,
    "Trust Alternative Media" = TrustAltMed_index,
    "Trust in Science" = TrustScience,
    "Religiosity" = religiousity,
    "Conspiracy Mentality" = ConspMent_index,
    "Truth is political" = EpistBel_Truth_index,
    "Intuitive Thinking" = EpistBel_IT_index,
    "Actively Open-Minded Thinking" = EpistBel_AOT_index,
    "Social Dominance Orientation" = SDO_index,
    "Political Trust" = TrustPolitical_index,
    "Traditional Media Use" = MediaUse_traditional_index,
    "Online Media Use" = MediaUse_online_index,
    "Social Media Use Total" = MediaUse_socialMedia_index,
    "Alternative Media Use" = AltMed_index
  )
view(merged_dataset)

# Rename rows
merged_dataset$variable <- gsub("COVID_CTB_dummy", "COVID Conspiracy Belief (binary)", merged_dataset$variable)
merged_dataset$variable <- gsub("COVID_CTB", "COVID Conspiracy Belief (continuous)", merged_dataset$variable)
merged_dataset$variable <- gsub("misinfo_covid_beliefs_dummy", "COVID Misinformation Belief (binary)", merged_dataset$variable)
merged_dataset$variable <- gsub("misinfo_covid_beliefs", "COVID Misinformation Belief (continuous)", merged_dataset$variable)
merged_dataset$variable <- gsub("gender_male", "Male", merged_dataset$variable)
merged_dataset$variable <- gsub("age", "Age", merged_dataset$variable)
merged_dataset$variable <- gsub("edu_high", "High Education", merged_dataset$variable)
merged_dataset$variable <- gsub("polorientation", "Political Orientation", merged_dataset$variable)
merged_dataset$variable <- gsub("TrustMedia_index", "Trust Media", merged_dataset$variable)
merged_dataset$variable <- gsub("TrustAltMed_index", "Trust Alternative Media", merged_dataset$variable)
merged_dataset$variable <- gsub("TrustScience", "Trust in Science", merged_dataset$variable)
merged_dataset$variable <- gsub("religiousity", "Religiosity", merged_dataset$variable)
merged_dataset$variable <- gsub("ConspMent_index", "Conspiracy Mentality", merged_dataset$variable)
merged_dataset$variable <- gsub("EpistBel_Truth_index", "Truth is political", merged_dataset$variable)
merged_dataset$variable <- gsub("EpistBel_IT_index", "Intuitive Thinking", merged_dataset$variable)
merged_dataset$variable <- gsub("EpistBel_AOT_index", "Actively Open-Minded Thinking", merged_dataset$variable)
merged_dataset$variable <- gsub("SDO_index", "Social Dominance Orientation", merged_dataset$variable)
merged_dataset$variable <- gsub("TrustPolitical_index", "Political Trust", merged_dataset$variable)
merged_dataset$variable <- gsub("MediaUse_traditional_index", "Traditional Media Use", merged_dataset$variable)
merged_dataset$variable <- gsub("MediaUse_online_index", "Online Media Use", merged_dataset$variable)
merged_dataset$variable <- gsub("MediaUse_socialMedia_index", "Social Media Use", merged_dataset$variable)
merged_dataset$variable <- gsub("AltMed_index", "Alternative Media Use", merged_dataset$variable)
view(merged_dataset)

# Resort columns and rows
merged_dataset <- merged_dataset[, c(
  "variable", 
  "mean",
  "sd",
  "min",
  "max",
  "% of the sample",
  "COVID Conspiracy Belief (binary)",
  "COVID Misinformation Belief (binary)",
  "COVID Conspiracy Belief (continuous)",
  "COVID Misinformation Belief (continuous)",
  "Truth is political",
  "Intuitive Thinking",
  "Actively Open-Minded Thinking",
  "Conspiracy Mentality",
  "Social Dominance Orientation",
  "Political Orientation",
  "Religiosity",
  "Traditional Media Use",
  "Online Media Use",
  "Social Media Use Total",
  "Alternative Media Use",
  "Trust Alternative Media",
  "Trust Media",
  "Trust in Science",
  "Political Trust",
  "Male",
  "Age",
  "High Education"
)]

row_order <- c(
  "COVID Conspiracy Belief (binary)",
  "COVID Misinformation Belief (binary)",
  "COVID Conspiracy Belief (continuous)",
  "COVID Misinformation Belief (continuous)",
  "Truth is political",
  "Intuitive Thinking",
  "Actively Open-Minded Thinking",
  "Conspiracy Mentality",
  "Social Dominance Orientation",
  "Political Orientation",
  "Religiosity",
  "Traditional Media Use",
  "Online Media Use",
  "Social Media Use",
  "Alternative Media Use",
  "Trust Alternative Media",
  "Trust Media",
  "Trust in Science",
  "Political Trust",
  "Male",
  "Age",
  "High Education"
)

merged_dataset <- merged_dataset[match(row_order, merged_dataset$variable), ]


view(merged_dataset)


### Step 8: Display the Final Table ----
library(DT)

datatable(
  merged_dataset,
  extensions = 'FixedColumns',
  options = list(
    scrollX = TRUE,
    fixedColumns = list(leftColumns = 2)
  )
)

### Step 9: Export interactive table ----
dt <- datatable(
  merged_dataset,
  extensions = 'FixedColumns',
  options = list(
    scrollX = TRUE,
    fixedColumns = list(leftColumns = 2)
  )
)

library(htmltools)

save_html(dt, file = "_tables/2_correlation_table_interactive.html")



# --------------------------------------------------------------------------------
## Printable correlation table with descriptives (numeric and factor) ----
# Resort columns
ds_corr <- ds_corr[, c(
  "COVID_CTB_dummy",
  "misinfo_covid_beliefs_dummy",
  "COVID_CTB",
  "misinfo_covid_beliefs",
  "EpistBel_Truth_index",
  "EpistBel_IT_index",
  "EpistBel_AOT_index",
  "ConspMent_index",
  "SDO_index",
  "polorientation",
  "religiousity",
  "MediaUse_traditional_index",
  "MediaUse_online_index",
  "MediaUse_socialMedia_index",
  "AltMed_index",
  "TrustAltMed_index",
  "TrustMedia_index",
  "TrustScience",
  "TrustPolitical_index",
  "gender_male",
  "age",
  "edu_high"
)]
### Step 1: Compute correlation matrix and delete lower triangle ----
cor_results <- Hmisc::rcorr(as.matrix(ds_corr %>% dplyr::select(where(is.numeric))))
cor_matrix <- cor_results$r
p_values <- cor_results$P
cor_matrix <- round(cor_matrix, 2)
cor_matrix <- ifelse(p_values < 0.001, paste0(cor_matrix, "***"), cor_matrix)
cor_matrix <- ifelse(p_values >= 0.001 & p_values < 0.01, paste0(cor_matrix, "** "), cor_matrix)
cor_matrix <- ifelse(p_values >= 0.01 & p_values < 0.05, paste0(cor_matrix, "*  "), cor_matrix)
cor_matrix <- ifelse(p_values >= 0.05, paste0(cor_matrix, "   "), cor_matrix)
diag(cor_matrix) <- ""
cor_matrix[lower.tri(cor_matrix, diag = TRUE)] <- ""
diag(cor_matrix) <- ""  # Remove diagonal elements if necessary
cor_table <- as.data.frame(cor_matrix)
view(cor_table)

### Step 2: Combine Correlation Matrix with Descriptive Statistics and Percentages ----
cor_table$variable <- rownames(cor_table)
rownames(cor_table) <- NULL  # Remove row names
variable_index <- which(names(cor_table) == "variable")
cor_table <- cor_table[, c(variable_index, setdiff(1:ncol(cor_table), variable_index))]
view(cor_table)

merged_dataset <- merge(cor_table, merged_data, by = "variable", all.x = TRUE)

# Get the order of rows in cor_table
order <- match(cor_table$variable, merged_dataset$variable)

# Reorder merged_dataset based on the order of cor_table
merged_dataset <- merged_dataset[order, ]

# Reorder columns 
desired_order <- c("variable", "mean", "sd", "min", "max", "% of the sample", setdiff(names(merged_dataset), c("variable", "mean", "sd", "min", "max", "% of the sample")))
merged_dataset <- merged_dataset[, desired_order]

# Delete first column
merged_dataset <- merged_dataset %>%
  dplyr::select(-COVID_CTB_dummy)
view(merged_dataset)

### Step 3: Rename columns and rows ----
merged_dataset <- merged_dataset %>%
  rename(
    "COVID Misinformation Belief (continuous)" = misinfo_covid_beliefs,
    "COVID Misinformation Belief (binary)" = misinfo_covid_beliefs_dummy,
    "Male" = gender_male,
    "Age" = age,
    "High Education" = edu_high,
    "Political Orientation" = polorientation,
    "Trust Media" = TrustMedia_index,
    "Trust Alternative Media" = TrustAltMed_index,
    "Trust in Science" = TrustScience,
    "Religiosity" = religiousity,
    "Conspiracy Mentality" = ConspMent_index,
    "Truth is political" = EpistBel_Truth_index,
    "Intuitive Thinking" = EpistBel_IT_index,
    "Actively Open-Minded Thinking" = EpistBel_AOT_index,
    "Social Dominance Orientation" = SDO_index,
    "Political Trust" = TrustPolitical_index,
    "Traditional Media Use" = MediaUse_traditional_index,
    "Online Media Use" = MediaUse_online_index,
    "Social Media Use" = MediaUse_socialMedia_index,
    "Alternative Media Use" = AltMed_index
  )

merged_dataset$variable <- gsub("COVID_CTB_dummy", "COVID Conspiracy Belief (binary)", merged_dataset$variable)
merged_dataset$variable <- gsub("COVID_CTB", "COVID Conspiracy Belief (continuous)", merged_dataset$variable)
merged_dataset$variable <- gsub("misinfo_covid_beliefs_dummy", "COVID Misinformation Belief (binary)", merged_dataset$variable)
merged_dataset$variable <- gsub("misinfo_covid_beliefs", "COVID Misinformation Belief (continuous)", merged_dataset$variable)
merged_dataset$variable <- gsub("gender_male", "Male", merged_dataset$variable)
merged_dataset$variable <- gsub("age", "Age", merged_dataset$variable)
merged_dataset$variable <- gsub("edu_high", "High Education", merged_dataset$variable)
merged_dataset$variable <- gsub("polorientation", "Political Orientation", merged_dataset$variable)
merged_dataset$variable <- gsub("TrustMedia_index", "Trust Media", merged_dataset$variable)
merged_dataset$variable <- gsub("TrustAltMed_index", "Trust Alternative Media", merged_dataset$variable)
merged_dataset$variable <- gsub("TrustScience", "Trust in Science", merged_dataset$variable)
merged_dataset$variable <- gsub("religiousity", "Religiosity", merged_dataset$variable)
merged_dataset$variable <- gsub("ConspMent_index", "Conspiracy Mentality", merged_dataset$variable)
merged_dataset$variable <- gsub("EpistBel_Truth_index", "Truth is political", merged_dataset$variable)
merged_dataset$variable <- gsub("EpistBel_IT_index", "Intuitive Thinking", merged_dataset$variable)
merged_dataset$variable <- gsub("EpistBel_AOT_index", "Actively Open-Minded Thinking", merged_dataset$variable)
merged_dataset$variable <- gsub("SDO_index", "Social Dominance Orientation", merged_dataset$variable)
merged_dataset$variable <- gsub("TrustPolitical_index", "Political Trust", merged_dataset$variable)
merged_dataset$variable <- gsub("MediaUse_traditional_index", "Traditional Media Use", merged_dataset$variable)
merged_dataset$variable <- gsub("MediaUse_online_index", "Online Media Use", merged_dataset$variable)
merged_dataset$variable <- gsub("MediaUse_socialMedia_index", "Social Media Use", merged_dataset$variable)
merged_dataset$variable <- gsub("AltMed_index", "Alternative Media Use", merged_dataset$variable)
view(merged_dataset)

### Step 4: Display and export the final table ----
tinytable::tt(merged_dataset)
tinytable::tt(merged_dataset, theme = "void") |> tinytable::save_tt("_tables/2_correlation_table_printable.html", overwrite = TRUE)

## Word export for publication ----
library(officer)
library(flextable)

ft <- flextable(merged_dataset)
ft <- autofit(ft)
ft <- set_table_properties(ft, width = 1, layout = "autofit")
doc <- read_docx()
doc <- body_add_flextable(doc, value = ft)
print(doc, target = "_tables/2_correlation_table_printable.docx")

# --------------------------------------------------------------------------------
# ANTECEDENTS OF CTBs (binary): MULTIVARIATE LOGISTIC REGRESSION ----
# --------------------------------------------------------------------------------
## Build dataset ----
ds_analysis_paper2_CTBs_Ant<-dplyr::select(ds_MisInfoCT_analysis,
                                           COVID_CTB_dummy,misinfo_covid_beliefs_dummy,
                                           ConspMent_index,
                                           MediaUse_traditional_index,MediaUse_online_index,
                                           MediaUse_socialMedia_index,AltMed_index,
                                           EpistBel_IT_index,EpistBel_Truth_index,EpistBel_AOT_index,
                                           SDO_index,
                                           TrustMedia_index,TrustAltMed_index,TrustScience,TrustPolitical_index,
                                           gender_male,age,edu_high,religiousity,polorientation
) 

str(ds_analysis_paper2_CTBs_Ant)

# Assign labels to each variable
ds_analysis_paper2_CTBs_Ant <- ds_analysis_paper2_CTBs_Ant %>%
  mutate(COVID_CTB_dummy = set_label(COVID_CTB_dummy, "COVID Conspiracy Theory Belief (binary)"),
         misinfo_covid_beliefs_dummy = set_label(misinfo_covid_beliefs_dummy, "Misinformation COVID Beliefs (binary)"),
         ConspMent_index = set_label(ConspMent_index, "Conspiracy Mentality"),
         MediaUse_traditional_index = set_label(MediaUse_traditional_index, "Traditional Media Use"),
         MediaUse_online_index = set_label(MediaUse_online_index, "Online Media Use"),
         MediaUse_socialMedia_index = set_label(MediaUse_socialMedia_index, "Social Media Use"),
         AltMed_index = set_label(AltMed_index, "Alternative Media Use"),
         EpistBel_IT_index = set_label(EpistBel_IT_index, "Intuitive Thinking"),
         EpistBel_Truth_index = set_label(EpistBel_Truth_index, "Truth is Political"),
         EpistBel_AOT_index = set_label(EpistBel_AOT_index, "Actively Open-Minded Thinking"),
         SDO_index = set_label(SDO_index, "Social Dominance Orientation"),
         TrustMedia_index = set_label(TrustMedia_index, "Trust in Media"),
         TrustAltMed_index = set_label(TrustAltMed_index, "Trust in Alternative Media"),
         TrustScience = set_label(TrustScience, "Trust in Science"),
         TrustPolitical_index = set_label(TrustPolitical_index, "Trust in Politics"),
         gender_male = set_label(gender_male, "Gender Male"),
         age = set_label(age, "Age"),
         edu_high = set_label(edu_high, "Education High"),
         religiousity = set_label(religiousity, "Religiosity"),
         polorientation = set_label(polorientation, "Political Orientation"))


# --------------------------------------------------------------------------------
## Fit the initial logistic regression model ----
model <- glm(COVID_CTB_dummy ~ ConspMent_index +
               MediaUse_traditional_index + MediaUse_online_index +
               MediaUse_socialMedia_index + AltMed_index +
               EpistBel_IT_index + EpistBel_Truth_index + EpistBel_AOT_index +
               SDO_index +
               TrustMedia_index + TrustAltMed_index + TrustScience + TrustPolitical_index +
               gender_male + age + edu_high + religiousity + polorientation, 
             data = ds_analysis_paper2_CTBs_Ant, family = binomial)
summary(model)
### Assumption 1: The outcome is a binary or dichotomous variable ----
descr::freq(ds_analysis_paper2_CTBs_Ant$misinfo_covid_beliefs_dummy)
typeof(ds_analysis_paper2_CTBs_Ant$misinfo_covid_beliefs_dummy)

descr::freq(ds_analysis_paper2_CTBs_Ant$COVID_CTB_dummy)
typeof(ds_analysis_paper2_CTBs_Ant$COVID_CTB_dummy)
### Result: fulfilled

### Assumption 2: There is a linear relationship between the logit of the outcome and each predictor variables. -----
#### Visually inspecting the scatter plot between each predictor and the logit values
##### Predict the probability (p) of CTB positivity
probabilities <- predict(model, type = "response")
predicted.classes <- ifelse(probabilities > 0.5, "pos", "neg")
head(predicted.classes)
##### Select only numeric predictors
mydata <- ds_analysis_paper2_CTBs_Ant %>%
  dplyr::select_if(is.numeric) 
predictors <- colnames(mydata)
##### Bind the logit and tidying the data for plot
mydata <- mydata %>%
  mutate(logit = log(probabilities/(1-probabilities))) %>%
  gather(key = "predictors", value = "predictor.value", -logit)

##### create the scatterplots
ggplot(mydata, aes(logit, predictor.value))+
  geom_point(size = 0.5, alpha = 0.5) +
  geom_smooth(method = "loess") + 
  theme_bw() + 
  facet_wrap(~predictors, scales = "free_y")

### Result: Sufficiently linear relationship for all predictors except age
#### If the scatter plot shows non-linearity, you need other methods to build the 
#### model such as including 2 or 3-power terms, fractional polynomials and spline function.

### Natural spline transformation for age ----
model_spline <- glm(COVID_CTB_dummy ~ ConspMent_index +
                      MediaUse_traditional_index + MediaUse_online_index +
                      MediaUse_socialMedia_index + AltMed_index +
                      EpistBel_IT_index + EpistBel_Truth_index + EpistBel_AOT_index +
                      SDO_index +
                      TrustMedia_index + TrustScience + TrustPolitical_index +
                      gender_male + ns(age, df = 4) + edu_high + religiousity + polorientation, 
                    data = ds_analysis_paper2_CTBs_Ant, family = binomial)
summary(model_spline)

model_nonspline <- glm(COVID_CTB_dummy ~ ConspMent_index +
                         MediaUse_traditional_index + MediaUse_online_index +
                         MediaUse_socialMedia_index + AltMed_index +
                         EpistBel_IT_index + EpistBel_Truth_index + EpistBel_AOT_index +
                         SDO_index +
                         TrustMedia_index + TrustScience + TrustPolitical_index +
                         gender_male + age + edu_high + religiousity + polorientation, 
                       data = ds_analysis_paper2_CTBs_Ant, family = binomial)
summary(model_nonspline)


### Assumption 3: There is no influential values (extreme values or outliers) in the continuous predictors ----
#### Visualizing the Cook’s distance values
plot(model, which = 4, id.n = 3)
# To check whether the data contains potential influential observations, the standardized residual error can 
# be inspected. Data points with an absolute standardized residuals above 3 represent possible outliers and 
# may deserve closer attention.

# Extract model results
model.data <- broom::augment(model) %>% 
  mutate(index = 1:n()) 
model.data %>% top_n(3, .cooksd)
ggplot(model.data, aes(index, .std.resid)) + 
  geom_point(aes(color = COVID_CTB_dummy), alpha = .5) +
  theme_bw()
model.data %>% 
  filter(abs(.std.resid) > 3)

### Result: row 1400 influential outlier -> remove
ds_analysis_paper2_CTBs_Ant <- subset(ds_analysis_paper2_CTBs_Ant, rownames(ds_analysis_paper2_CTBs_Ant) != "1400")

### Assumption 4: There is no high intercorrelations (i.e. multicollinearity) among the predictors. ----
car::vif(model)
### Result: all values under 4 indicating no multicollinearity


# --------------------------------------------------------------------------------
## Fit the final logistic regression model ----
# Assign labels to each variable
ds_analysis_paper2_CTBs_Ant <- ds_analysis_paper2_CTBs_Ant %>%
  mutate(COVID_CTB_dummy = set_label(COVID_CTB_dummy, "COVID Conspiracy Theory Belief (binary)"),
         misinfo_covid_beliefs_dummy = set_label(misinfo_covid_beliefs_dummy, "Misinformation COVID Beliefs (binary)"),
         ConspMent_index = set_label(ConspMent_index, "Conspiracy Mentality"),
         MediaUse_traditional_index = set_label(MediaUse_traditional_index, "Traditional Media Use"),
         MediaUse_online_index = set_label(MediaUse_online_index, "Online Media Use"),
         MediaUse_socialMedia_index = set_label(MediaUse_socialMedia_index, "Social Media Use"),
         AltMed_index = set_label(AltMed_index, "Alternative Media Use"),
         EpistBel_IT_index = set_label(EpistBel_IT_index, "Intuitive Thinking"),
         EpistBel_Truth_index = set_label(EpistBel_Truth_index, "Truth is Political"),
         EpistBel_AOT_index = set_label(EpistBel_AOT_index, "Actively Open-Minded Thinking"),
         SDO_index = set_label(SDO_index, "Social Dominance Orientation"),
         TrustMedia_index = set_label(TrustMedia_index, "Trust in Media"),
         TrustAltMed_index = set_label(TrustAltMed_index, "Trust in Alternative Media"),
         TrustScience = set_label(TrustScience, "Trust in Science"),
         TrustPolitical_index = set_label(TrustPolitical_index, "Trust in Politics"),
         gender_male = set_label(gender_male, "Gender Male"),
         age = set_label(age, "Age"),
         edu_high = set_label(edu_high, "Education High"),
         religiousity = set_label(religiousity, "Religiosity"),
         polorientation = set_label(polorientation, "Political Orientation"))
str(ds_analysis_paper2_CTBs_Ant)



ds_analysis_paper2_CTBs_Ant$gender_male <- factor(ds_analysis_paper2_CTBs_Ant$gender_male,
                                                  levels = c("0", "1"),
                                                  labels = c("Female", "Male"))

ds_analysis_paper2_CTBs_Ant$edu_high <- factor(ds_analysis_paper2_CTBs_Ant$edu_high,
                                               levels = c("0", "1"),
                                               labels = c("Low Education", "High Education"))

# fit the model
model_CTB <- glm(COVID_CTB_dummy ~ ConspMent_index +
               EpistBel_Truth_index + EpistBel_IT_index + EpistBel_AOT_index +
               SDO_index + polorientation + religiousity +
               MediaUse_traditional_index + MediaUse_online_index +
               MediaUse_socialMedia_index + AltMed_index +
               TrustMedia_index + TrustAltMed_index + TrustScience + TrustPolitical_index +
               gender_male + age + edu_high,  
             data = ds_analysis_paper2_CTBs_Ant, family = binomial)
summary(model_CTB)

# Check the updated term labels
sjlabelled::get_term_labels(model_CTB)



### Standardize estimates  ----
model_CTB_stand<-arm::standardize(model_CTB, unchanged = NULL, 
                              standardize.y = FALSE, binary.inputs = "center")
summary(model_CTB_stand)

### Interpreting the model ----
#### Chi-Square and its significance following Field ----
modelChi <- model_CTB$null.deviance - model_CTB$deviance
modelChi
chidf <- model_CTB$df.null - model_CTB$df.residual
chidf
chisq.prob <- 1 - pchisq(modelChi, chidf)
chisq.prob
# Result: χ2(18) = 1571.41, p < .001

#### Pseudo-R-Square following Field using DescTools ----
DescTools::PseudoR2(model_CTB, which = c("CoxSnell","Nagelkerke","McFadden"))

### Get various fit statistics using blorr ----
blorr::blr_model_fit_stats(model_CTB) # report 

### Interpret coefficients of individual predictors ----
#### Estimates on the untransformed scale (log-odds coefficients, b, unstandardized) ----
summary(model_CTB)
# plot as table (transform = NULL)
sjPlot::tab_model(model_CTB, show.se=TRUE, transform = NULL, p.style = "stars", show.aic = TRUE, show.loglik=TRUE, show.r2=TRUE)
#### Interpretation: Increase of the log(odds) caused by a unit change of the predictor

#### Estimates on the transformed scale (log odds ratio coefficients, exponential of b) ----
oddrat <- exp(coef(model_CTB))
oddrat
# plot as table (transform = TRUE)
sjPlot::tab_model(model_CTB, show.se=TRUE, p.style = "stars")


# --------------------------------------------------------------------------------
# ANTECEDENTS OF Misinfo Belief (binary): MULTIVARIATE LOGISTIC REGRESSION ----
# --------------------------------------------------------------------------------
## Fit the initial logistic regression model ----
model_misinfo <- glm(misinfo_covid_beliefs_dummy ~ ConspMent_index +
                       MediaUse_traditional_index + MediaUse_online_index +
                       MediaUse_socialMedia_index + AltMed_index +
                       EpistBel_IT_index + EpistBel_Truth_index + EpistBel_AOT_index +
                       SDO_index +
                       TrustMedia_index + TrustAltMed_index + TrustScience + TrustPolitical_index +
                       gender_male + age + edu_high + religiousity + polorientation, 
                     data = ds_analysis_paper2_CTBs_Ant, family = binomial)
summary(model_misinfo)

## Check assumptions ----
### Assumption 1: The outcome is a binary or dichotomous variable ----
descr::freq(ds_analysis_paper2_CTBs_Ant$misinfo_covid_beliefs_dummy)
typeof(ds_analysis_paper2_CTBs_Ant$misinfo_covid_beliefs_dummy)

### Assumption 2: There is a linear relationship between the logit of the outcome and each predictor variables. -----
#### Visually inspecting the scatter plot between each predictor and the logit values
###### Predict the probability (p) of misinformation belief positivity
probabilities <- predict(model_misinfo, type = "response")
predicted.classes <- ifelse(probabilities > 0.5, "pos", "neg")
head(predicted.classes)
###### Select only numeric predictors
mydata <- ds_analysis_paper2_CTBs_Ant %>%
  dplyr::select_if(is.numeric)
predictors <- colnames(mydata)
###### Bind the logit and tidying the data for plot
mydata <- mydata %>%
  mutate(logit = log(probabilities / (1 - probabilities))) %>%
  gather(key = "predictors", value = "predictor.value", -logit)

###### Create the scatterplots
ggplot(mydata, aes(logit, predictor.value)) +
  geom_point(size = 0.5, alpha = 0.5) +
  geom_smooth(method = "loess") + 
  theme_bw() + 
  facet_wrap(~predictors, scales = "free_y")

### Result: Sufficiently linear relationship for all predictors except online and traditional media use
#### If the scatter plot shows non-linearity, you need other methods to build the 
#### model such as including 2 or 3-power terms, fractional polynomials and spline function.

### Natural spline transformation for online and traditional media use ----
model_misinfo_spline <- glm(misinfo_covid_beliefs_dummy ~ ConspMent_index +
                              ns(MediaUse_traditional_index, df = 4) + ns(MediaUse_online_index, df = 4) +
                              MediaUse_socialMedia_index + AltMed_index +
                              EpistBel_IT_index + EpistBel_Truth_index + EpistBel_AOT_index +
                              SDO_index +
                              TrustMedia_index + TrustAltMed_index + TrustScience + TrustPolitical_index +
                              gender_male + age + edu_high + religiousity + polorientation, 
                            data = ds_analysis_paper2_CTBs_Ant, family = binomial)
summary(model_misinfo_spline)

### Result: no significant influences on all levels for online and traditional


### Assumption 3: There is no influential values (extreme values or outliers) in the continuous predictors ----
#### Visualizing the Cook’s distance values
plot(model_misinfo, which = 4, id.n = 3)
# To check whether the data contains potential influential observations, the standardized residual error can 
# be inspected. Data points with an absolute standardized residuals above 3 represent possible outliers and 
# may deserve closer attention.

# Extract model results
model.data <- broom::augment(model_misinfo) %>% 
  mutate(index = 1:n()) 
model.data %>% top_n(3, .cooksd)
ggplot(model.data, aes(index, .std.resid)) + 
  geom_point(aes(color = misinfo_covid_beliefs_dummy), alpha = .5) +
  theme_bw()
model.data %>% 
  filter(abs(.std.resid) > 3)

### Result: no influential outliers

### Assumption 4: There is no high intercorrelations (i.e. multicollinearity) among the predictors. ----
car::vif(model_misinfo)
### Result: all values under 4 indicating no multicollinearity

# --------------------------------------------------------------------------------
## Fit the final logistic regression model ----
# Fit the model
model_misinfo <- glm(misinfo_covid_beliefs_dummy ~ ConspMent_index +
                       EpistBel_Truth_index + EpistBel_IT_index + EpistBel_AOT_index +
                       SDO_index + polorientation + religiousity +
                       MediaUse_traditional_index + MediaUse_online_index +
                       MediaUse_socialMedia_index + AltMed_index +
                       TrustMedia_index + TrustAltMed_index + TrustScience + TrustPolitical_index +
                       gender_male + age + edu_high , 
                     data = ds_analysis_paper2_CTBs_Ant, family = binomial)
summary(model_misinfo)

# Check the updated term labels
sjlabelled::get_term_labels(model_misinfo)

### Standardize estimates ----
model_misinfo_stand <- arm::standardize(model_misinfo, unchanged = NULL, 
                                        standardize.y = FALSE, binary.inputs = "center")
summary(model_misinfo_stand)

### Interpreting the model ----
#### Chi-Square and its significance following Field ----
modelChi <- model_misinfo$null.deviance - model_misinfo$deviance
modelChi
chidf <- model_misinfo$df.null - model_misinfo$df.residual
chidf
chisq.prob <- 1 - pchisq(modelChi, chidf)
chisq.prob
# Result: χ2(18) = 1224,20, p < .001

#### Pseudo-R-Square following Field using DescTools ----
DescTools::PseudoR2(model_misinfo, which = c("CoxSnell", "Nagelkerke", "McFadden"))

### Get various fit statistics using blorr ----
blorr::blr_model_fit_stats(model_misinfo)  

### Interpret coefficients of individual predictors ----
#### Estimates on the untransformed scale (log-odds coefficients, b, unstandardized) ----
summary(model_misinfo)
# plot as table (transform = NULL)
sjPlot::tab_model(model_misinfo, show.se=TRUE, transform = NULL, p.style = "stars", show.aic = TRUE, show.loglik=TRUE, show.r2=TRUE)
#### Interpretation: Increase of the log(odds) caused by a unit change of the predictor
#### Reporting in text: b = 1.23, z = 3.07, p < .002. Wald Test for testing significance

#### Estimates on the transformed scale (log odds ratio coefficients, exponential of b) ----
oddrat <- exp(coef(model_misinfo))
oddrat
# plot as table (transform = TRUE)
sjPlot::tab_model(model_misinfo, show.se=TRUE, p.style = "stars")

# --------------------------------------------------------------------------------
# ANTECEDENTS OF MISINFO AND CT Belief (binary): OUTPUT AND RESULTS ----
# --------------------------------------------------------------------------------
## Export detailed table CTB logistic regression----
# Load required packages
library(dplyr)
library(lmtest)
library(sandwich)
library(DescTools)
library(kableExtra)
library(flextable)
library(officer)
library(parameters)

### Compute Robust standard errors (HC3) ----
cov.robust <- sandwich::vcovHC(model_CTB, type = "HC3")
robust_summary <- lmtest::coeftest(model_CTB, vcov = cov.robust)
coef_table <- data.frame(
  Predictor = rownames(robust_summary),
  Estimate = robust_summary[, 1],
  Std_Error = robust_summary[, 2],
  z_value = robust_summary[, 3],
  p_value = robust_summary[, 4],
  row.names = NULL,
  stringsAsFactors = FALSE
)

### Compute Odds ratios and robust confidence intervals ----
ci_robust <- coefci(model_CTB, vcov = cov.robust)
OR <- exp(coef(model_CTB))
CI_low <- exp(ci_robust[, 1])
CI_high <- exp(ci_robust[, 2])

results <- data.frame(
  Predictor = rownames(coef_table),
  Log_Odds = coef_table$Estimate,
  Std_Error = coef_table$Std_Error,
  p_raw = coef_table$p_value,
  Odds_Ratio = OR,
  CI_low = CI_low,
  CI_high = CI_high
)

### Compute Bonferroni correction and add significance stars ----
results$p_adj <- p.adjust(results$p_raw, method = "bonferroni")

results$stars <- cut(
  results$p_adj,
  breaks = c(-Inf, 0.001, 0.01, 0.05, Inf),
  labels = c("***", "**", "*", "")
)

### Fix names issue ----
results <- results %>%
  rename(Predictor_old = Predictor) %>%  # rename existing column
  rownames_to_column(var = "Predictor")  # add rownames as Predictor
results <- results %>% dplyr::select(-Predictor_old)

### Add y-standardized coefficients ----
std_par <- standardize_parameters(
  model_CTB,
  method = "sdy",
  ci = 0.95,
  robust = TRUE
)

std_df <- as.data.frame(std_par)
y_std <- setNames(std_df$Std_Coefficient, std_df$Parameter)

results$Y_std <- y_std[results$Predictor]
results$Y_std_fmt <- sprintf("%.2f", results$Y_std)

### Format columns ----
results <- results %>%
  mutate(
    Log_Odds_fmt = sprintf("%.2f %s", Log_Odds, stars),
    Std_Error_fmt = sprintf("%.2f", Std_Error),
    Odds_Ratio_fmt = sprintf("%.2f %s", Odds_Ratio, stars),
    CI_fmt = sprintf("%.2f – %.2f", CI_low, CI_high)
  )

### Create main results table ----
table_out <- results %>%
  dplyr::select(Predictor,
         Log_Odds_fmt,
         Std_Error_fmt,
         Odds_Ratio_fmt,
         CI_fmt,
         Y_std_fmt)

colnames(table_out) <- c("Predictor", "Log-Odds", "Std. Error", "Odds Ratios", "95% CI", "Y-Std Coef")

### Compute model fit statistics ----
# Model chi-square
modelChi <- model_CTB$null.deviance - model_CTB$deviance
chidf <- model_CTB$df.null - model_CTB$df.residual
chisq.prob <- 1 - pchisq(modelChi, chidf)

# Pseudo-R²
r2s <- DescTools::PseudoR2(model_CTB, which = c("Nagelkerke"))

# Fit indices summary
fit_stats <- data.frame(
  Predictor = c("Observations", "AIC", "log-Likelihood", "χ²(df)", "p (Model)", "R² (Nagelkerke)"),
  `Log-Odds` = c(
    nobs(model_CTB),
    round(AIC(model_CTB), 3),
    round(logLik(model_CTB)[1], 3),
    paste0(round(modelChi, 2), " (", chidf, ")"),
    ifelse(chisq.prob < .001, "< .001", round(chisq.prob, 3)),
    round(r2s, 2)
  ),
  `Std. Error` = NA,
  `Odds Ratios` = NA,
  `95% CI` = NA,
  `Y-Std Coef` = NA  
)
fit_stats
# Combine coefficient and model info tables
colnames(fit_stats) <- colnames(table_out)
final_table <- dplyr::bind_rows(table_out, fit_stats)

final_table <- bind_rows(table_out, fit_stats)
final_table <- final_table[, c("Predictor", "Log-Odds", "Std. Error", "Y-Std Coef", "Odds Ratios", "95% CI")]
final_table
### Export to Word or HTML ----
final_table <- final_table %>%
  mutate(Predictor = dplyr::recode(Predictor,
                            "ConspMent_index" = "Conspiracy Mentality",
                            "MediaUse_traditional_index" = "Traditional Media Use",
                            "MediaUse_online_index" = "Online Media Use",
                            "MediaUse_socialMedia_index" = "Social Media Use",
                            "AltMed_index" = "Alternative Media Use",
                            "EpistBel_IT_index" = "Intuitive Thinking",
                            "EpistBel_Truth_index" = "Truth is Political",
                            "EpistBel_AOT_index" = "Actively Open-Minded Thinking",
                            "SDO_index" = "Social Dominance Orientation",
                            "TrustMedia_index" = "Trust in Media",
                            "TrustAltMed_index" = "Trust in Alternative Media",
                            "TrustScience" = "Trust in Science",
                            "TrustPolitical_index" = "Trust in Politics",
                            "gender_maleMale" = "Gender Male",
                            "age" = "Age",
                            "edu_highHigh Education" = "Education High",
                            "religiousity" = "Religiosity",
                            "polorientation" = "Political Orientation"
  ))

# HTML
library(kableExtra)

kbl(final_table, format = "html", 
    caption = "Binary logistic regression predicting belief in conspiracy theories") %>%
  kable_styling(full_width = FALSE, bootstrap_options = c("striped", "hover")) %>%
  add_footnote("Note: * p<0.05; ** p<0.01; *** p<0.001 (Bonferroni-adjusted to account for multiple comparisons). Heteroscedasticity-consistent standard errors using the sandwich-package in R (Zeileis et al., 2020).")


# Word
ft <- flextable(final_table)
# Set font
ft <- font(ft, fontname = "Calibri", part = "all")
# autofit on page
ft <- autofit(ft)
# Add header
ft <- add_header_lines(
  ft,
  values = "Table A1. Binary logistic regression predicting belief in conspiracy theories"
)

# Format the header 
ft <- compose(
  ft, 
  i = 1, j = 1, part = "header",
  value = as_paragraph(
    as_chunk("Table A1", props = fp_text(italic = TRUE, bold = TRUE, font.family = "Calibri")),
    as_chunk(". Binary logistic regression predicting belief in conspiracy theories", props = fp_text(italic = TRUE, font.family = "Calibri"))
  )
)

# Remove top border
ft <- border_remove(ft)
ft <- hline(ft, border = fp_border(color="black", width = 1), part = "header")

# Find the row where Predictor == "Observations"
obs_row <- which(final_table$Predictor == "Observations")

# Add a horizontal line above that row
ft <- hline(ft, i = obs_row - 1, border = fp_border(color="black", width = 1))

ft <- hline_bottom(ft, border = fp_border(color = "black", width = 1))

# Add footer
ft <- add_footer_lines(
  ft,
  values = "Note: * p<0.05; ** p<0.01; *** p<0.001 (Bonferroni-adjusted to account for multiple comparisons). Heteroscedasticity-consistent standard errors using the sandwich-package in R (Zeileis et al., 2020)."
)
ft <- font(ft, fontname = "Calibri", part = "footer")


# Save as Word
save_as_docx(ft, path = "_tables/2_glm_CTBs_final.docx")



# --------------------------------------------------------------------------------
## Export detailed table MBs logistic regression----
### Compute Robust standard errors (HC3) ----
cov.robust <- sandwich::vcovHC(model_misinfo, type = "HC3")
robust_summary <- lmtest::coeftest(model_misinfo, vcov = cov.robust)
coef_table <- data.frame(
  Predictor = rownames(robust_summary),
  Estimate = robust_summary[, 1],
  Std_Error = robust_summary[, 2],
  z_value = robust_summary[, 3],
  p_value = robust_summary[, 4],
  row.names = NULL,
  stringsAsFactors = FALSE
)

### Compute Odds ratios and robust confidence intervals ----
ci_robust <- coefci(model_misinfo, vcov = cov.robust)
OR <- exp(coef(model_misinfo))
CI_low <- exp(ci_robust[, 1])
CI_high <- exp(ci_robust[, 2])

results <- data.frame(
  Predictor = rownames(coef_table),
  Log_Odds = coef_table$Estimate,
  Std_Error = coef_table$Std_Error,
  p_raw = coef_table$p_value,
  Odds_Ratio = OR,
  CI_low = CI_low,
  CI_high = CI_high
)

### Bonferroni correction and significance stars ----
results$p_adj <- p.adjust(results$p_raw, method = "bonferroni")
results$stars <- cut(
  results$p_adj,
  breaks = c(-Inf, 0.001, 0.01, 0.05, Inf),
  labels = c("***", "**", "*", "")
)

### Fix names issue ----
results <- results %>%
  rename(Predictor_old = Predictor) %>%
  rownames_to_column(var = "Predictor") %>%
  dplyr::select(-Predictor_old)

### Add y-standardized coefficients ----
std_par <- standardize_parameters(
  model_misinfo,
  method = "sdy",
  ci = 0.95,
  robust = TRUE
)
std_df <- as.data.frame(std_par)
y_std <- setNames(std_df$Std_Coefficient, std_df$Parameter)
results$Y_std <- y_std[results$Predictor]
results$Y_std_fmt <- sprintf("%.2f", results$Y_std)

### Format columns ----
results <- results %>%
  mutate(
    Log_Odds_fmt = sprintf("%.2f %s", Log_Odds, stars),
    Std_Error_fmt = sprintf("%.2f", Std_Error),
    Odds_Ratio_fmt = sprintf("%.2f %s", Odds_Ratio, stars),
    CI_fmt = sprintf("%.2f – %.2f", CI_low, CI_high)
  )

### Create main results table ----
table_out <- results %>%
  dplyr::select(Predictor, Log_Odds_fmt, Std_Error_fmt, Odds_Ratio_fmt, CI_fmt, Y_std_fmt)
colnames(table_out) <- c("Predictor", "Log-Odds", "Std. Error", "Odds Ratios", "95% CI", "Y-Std Coef")

### Compute model fit statistics ----
modelChi <- model_misinfo$null.deviance - model_misinfo$deviance
chidf <- model_misinfo$df.null - model_misinfo$df.residual
chisq.prob <- 1 - pchisq(modelChi, chidf)
r2s <- DescTools::PseudoR2(model_misinfo, which = c("Nagelkerke"))

fit_stats <- data.frame(
  Predictor = c("Observations", "AIC", "log-Likelihood", "χ²(df)", "p (Model)", "R² (Nagelkerke)"),
  `Log-Odds` = c(
    nobs(model_misinfo),
    round(AIC(model_misinfo), 3),
    round(logLik(model_misinfo)[1], 3),
    paste0(round(modelChi, 2), " (", chidf, ")"),
    ifelse(chisq.prob < .001, "< .001", round(chisq.prob, 3)),
    round(r2s, 2)
  ),
  `Std. Error` = NA,
  `Odds Ratios` = NA,
  `95% CI` = NA,
  `Y-Std Coef` = NA
)
# Combine coefficient and model info tables
colnames(fit_stats) <- colnames(table_out)
final_table <- dplyr::bind_rows(table_out, fit_stats)

final_table <- bind_rows(table_out, fit_stats)
final_table <- final_table[, c("Predictor", "Log-Odds", "Std. Error", "Y-Std Coef", "Odds Ratios", "95% CI")]
final_table

### Recode predictors ----
final_table <- final_table %>%
  mutate(Predictor = dplyr::recode(Predictor,
                                   "ConspMent_index" = "Conspiracy Mentality",
                                   "MediaUse_traditional_index" = "Traditional Media Use",
                                   "MediaUse_online_index" = "Online Media Use",
                                   "MediaUse_socialMedia_index" = "Social Media Use",
                                   "AltMed_index" = "Alternative Media Use",
                                   "EpistBel_IT_index" = "Intuitive Thinking",
                                   "EpistBel_Truth_index" = "Truth is Political",
                                   "EpistBel_AOT_index" = "Actively Open-Minded Thinking",
                                   "SDO_index" = "Social Dominance Orientation",
                                   "TrustMedia_index" = "Trust in Media",
                                   "TrustAltMed_index" = "Trust in Alternative Media",
                                   "TrustScience" = "Trust in Science",
                                   "TrustPolitical_index" = "Trust in Politics",
                                   "gender_maleMale" = "Gender Male",
                                   "age" = "Age",
                                   "edu_highHigh Education" = "Education High",
                                   "religiousity" = "Religiosity",
                                   "polorientation" = "Political Orientation"
  ))

### HTML export ----
kbl(final_table, format = "html",
    caption = "Binary logistic regression predicting belief in misinformation") %>%
  kable_styling(full_width = FALSE, bootstrap_options = c("striped", "hover")) %>%
  add_footnote("Note: * p<0.05; ** p<0.01; *** p<0.001 (Bonferroni-adjusted). Robust SEs via sandwich (Zeileis et al., 2020).")

### Word export ----
ft <- flextable(final_table)
ft <- font(ft, fontname = "Calibri", part = "all")
ft <- autofit(ft)
ft <- add_header_lines(ft, values = "Table A2. Binary logistic regression predicting belief in misinformation")
ft <- border_remove(ft)
ft <- hline(ft, border = fp_border(color="black", width = 1), part = "header")

obs_row <- which(final_table$Predictor == "Observations")
ft <- hline(ft, i = obs_row - 1, border = fp_border(color="black", width = 1))
ft <- hline_bottom(ft, border = fp_border(color = "black", width = 1))
ft <- add_footer_lines(ft, values = "Note: * p<0.05; ** p<0.01; *** p<0.001 (Bonferroni-adjusted). Robust SEs via sandwich (Zeileis et al., 2020).")
ft <- font(ft, fontname = "Calibri", part = "footer")

save_as_docx(ft, path = "_tables/2_glm_misinfo_final.docx")

# --------------------------------------------------------------------------------
## Plot effect sizes in one graph ----
### Split according to predictor categories ----
# Block 1: Motives 
p1 <- plot_models(model_CTB, model_misinfo,
                  rm.terms = c("MediaUse_traditional_index","MediaUse_online_index","MediaUse_socialMedia_index","AltMed_index",
                               "TrustMedia_index","TrustAltMed_index","TrustScience","TrustPolitical_index",
                               "gender_maleMale","age","edu_highHigh Education"),
                  std.est="std",
                  show.values = TRUE,
                  show.legend = FALSE,
                  axis.title = "",
                  value.size = 4,
                  spacing = 0.9,
                  vline.color = "black") +
  ggtitle("Cognitive, intergroup-, and personality-related predictors") +
  theme(
    plot.title = element_text(size = 10)  
  )
p1

# Block 2: Media Use
p2 <- plot_models(model_CTB, model_misinfo,
                  rm.terms = c("ConspMent_index","EpistBel_Truth_index","EpistBel_IT_index","EpistBel_AOT_index",
                               "SDO_index","polorientation","religiousity",
                               "TrustMedia_index","TrustAltMed_index","TrustScience","TrustPolitical_index",
                               "gender_maleMale","age","edu_highHigh Education"),
                  std.est="std",
                  show.values = TRUE,
                  show.legend = FALSE,
                  axis.title = "",
                  value.size = 4,
                  spacing = 0.8,
                  vline.color = "black") +
  ggtitle("Media Use") +
  theme(
    plot.title = element_text(size = 10)  
    )
p2

# Block 3: Controls
p3 <- plot_models(model_CTB, model_misinfo,
                  rm.terms = c(
                    "ConspMent_index","EpistBel_Truth_index","EpistBel_IT_index","EpistBel_AOT_index",
                    "SDO_index","polorientation","religiousity",
                    "MediaUse_traditional_index","MediaUse_online_index","MediaUse_socialMedia_index","AltMed_index"
                  ),
                  std.est="std",
                  show.values = TRUE,
                  show.legend = FALSE,
                  axis.lim = c(0.3, 4),  
                  value.size = 4,
                  spacing = 0.8,
                  vline.color = "black") +
  ggtitle("Controls") +
  theme(
    plot.title = element_text(size = 10)
  )

p3


legend <- cowplot::get_legend(
  plot_models(model_CTB, model_misinfo, transform = "exp") +
    theme(legend.position = "bottom") # nur einmal die Legende
)

### Combine in one plot ----
plot_oddsratio_combined <- plot_grid(
  plot_grid(
    p1, p2, p3,
    ncol = 1,
    align = "v",
    rel_heights = c(2, 1.5, 1.85)  
  ),
  legend,
  ncol = 1,
  rel_heights = c(1, 0.05)
)


plot_oddsratio_combined

### Export plot ----
ggsave(
  filename = "_figures/2_glm_MisInfo_CTB_predictors_oddsratio.jpg",
  plot = plot_oddsratio_combined,
  width = 11,        
  height = 17,       
  dpi = 300         
)


# --------------------------------------------------------------------------------
## Plot descriptives of significant motives in one graph ----
### Create density plots for predictors of CTBs ----
descriptives_CTB<-dplyr::select(ds_analysis_paper2_CTBs_Ant,
                                COVID_CTB_dummy,
                                ConspMent_index,
                                EpistBel_Truth_index,EpistBel_IT_index,
                                polorientation,religiousity, 
                                SDO_index,
                                AltMed_index,
                                TrustScience, TrustPolitical_index, TrustMedia_index, TrustAltMed_index,
                                age, edu_high
) 

str(descriptives_CTB)

# Assign labels to each variable and factor level
descriptives_CTB <- descriptives_CTB %>%
  mutate(COVID_CTB_dummy = set_label(COVID_CTB_dummy, "COVID Conspiracy Theory Belief (binary)"),
         ConspMent_index = set_label(ConspMent_index, "Conspiracy Mentality"),
         EpistBel_IT_index = set_label(EpistBel_IT_index, "Intuitive Thinking"),
         EpistBel_Truth_index = set_label(EpistBel_Truth_index, "Truth is Political"),
         polorientation = set_label(polorientation, "Political Orientation"),
         religiousity = set_label(religiousity, "Religiosity"),
         SDO_index = set_label(SDO_index, "Social Dominance Orientation"),
         AltMed_index = set_label(AltMed_index, "Alternative Media Use"),
         TrustScience = set_label(TrustScience, "Trust in Science"),
         TrustMedia_index = set_label(TrustMedia_index, "Trust in Media"),
         TrustAltMed_index = set_label(TrustAltMed_index, "Trust in Alternative Media"),
         TrustPolitical_index = set_label(TrustPolitical_index, "Trust in Politics"))
str(descriptives_CTB)

descriptives_CTB$COVID_CTB_dummy <- factor(descriptives_CTB$COVID_CTB_dummy,
                                           levels = c(0, 1),
                                           labels = c("CT-Non-Believers", "CT-Believers"))
freq(descriptives_CTB$COVID_CTB_dummy)

# Data split into two groups
data_0 <- descriptives_CTB %>% filter(COVID_CTB_dummy == 0)
data_1 <- descriptives_CTB %>% filter(COVID_CTB_dummy == 1)

#### Conspiracy Mentality ----
ggplot(descriptives_CTB, aes(x = ConspMent_index, y = COVID_CTB_dummy)) + 
  geom_density_ridges(scale = 0.8)+
  stat_density_ridges(quantile_lines = TRUE, quantiles = 2) +
  labs(x = "Conspiracy Mentality", y = NULL)

# Add colors
ggplot(descriptives_CTB, aes(x = ConspMent_index, y = as.factor(COVID_CTB_dummy), fill = as.factor(COVID_CTB_dummy))) + 
  geom_density_ridges(scale = 0.8) +
  stat_density_ridges(quantile_lines = TRUE, quantiles = 2) +
  scale_fill_brewer(palette = "Set2", name = "COVID CTB") +
  labs(x = "Conspiracy Mentality", y = NULL) +
  theme_minimal() +
  theme(legend.position = "none")  # Remove the legend if not needed

pA <- ggplot(descriptives_CTB, aes(x = ConspMent_index, y = as.factor(COVID_CTB_dummy), fill = as.factor(COVID_CTB_dummy))) + 
  geom_density_ridges(scale = 0.8) +
  stat_density_ridges(quantile_lines = TRUE, quantiles = 2) +
  scale_fill_brewer(palette = "Set2", name = "COVID CTB") +
  labs(x = "Conspiracy Mentality", y = NULL) +
  theme_minimal() +
  theme(legend.position = "none")

# Change vertical line to mean instead of median
pA <- ggplot(descriptives_CTB, aes(x = ConspMent_index, y = as.factor(COVID_CTB_dummy), fill = as.factor(COVID_CTB_dummy))) + 
  geom_density_ridges(scale = 0.8) +
  stat_density_ridges(quantile_lines = TRUE, quantile_fun = mean) +
  scale_fill_brewer(palette = "Set2", name = "COVID CTB") +
  labs(x = "Conspiracy Mentality", y = NULL) +
  theme_minimal() +
  theme(legend.position = "none")
pA

# Add label for the mean
# Calculate means for each group
mean_values_CM_CTB <- descriptives_CTB %>%
  group_by(COVID_CTB_dummy) %>%
  summarise(mean_value = mean(ConspMent_index))

# Create the plot
pA <- ggplot(descriptives_CTB, aes(x = ConspMent_index, y = as.factor(COVID_CTB_dummy), fill = as.factor(COVID_CTB_dummy))) + 
  geom_density_ridges(scale = 0.8) +
  stat_density_ridges(quantile_lines = TRUE, quantile_fun = mean) +
  scale_fill_brewer(palette = "Set2", name = "COVID CTB") +
  labs(x = "Conspiracy Mentality", y = NULL) +
  theme_minimal() +
  theme(legend.position = "none")

# Add text labels for mean values
pA <- pA +
  geom_text(data = mean_values_CM_CTB, aes(x = mean_value, y = as.factor(COVID_CTB_dummy), 
                                    label = paste("", round(mean_value, 2))),
            vjust = -2, hjust = -0.5, size = 3)

# Final Plot
ConMen_CTB <- ggplot(descriptives_CTB, aes(x = ConspMent_index, y = as.factor(COVID_CTB_dummy), fill = as.factor(COVID_CTB_dummy))) + 
  geom_density_ridges(scale = 0.8) +
  stat_density_ridges(quantile_lines = TRUE, quantile_fun = mean) +
  scale_fill_brewer(palette = "Set2", name = "COVID CTB") +
  labs(x = "Conspiracy Mentality", y = NULL) +
  theme_minimal() +
  theme(legend.position = "none")+
  geom_text(data = mean_values_CM_CTB, aes(x = mean_value, y = as.factor(COVID_CTB_dummy), 
                                    label = paste("", round(mean_value, 2))),
            vjust = -2, hjust = -0.5, size = 3)
ConMen_CTB

#### Truth is Political ----
mean_values_TiP_CTB <- descriptives_CTB %>%
  group_by(COVID_CTB_dummy) %>%
  summarise(mean_value = mean(EpistBel_Truth_index))

TiP_CTB <- ggplot(descriptives_CTB, aes(x = EpistBel_Truth_index, y = as.factor(COVID_CTB_dummy), fill = as.factor(COVID_CTB_dummy))) + 
  geom_density_ridges(scale = 0.8) +
  stat_density_ridges(quantile_lines = TRUE, quantile_fun = mean) +
  scale_fill_brewer(palette = "Set2", name = "COVID CTB") +
  labs(x = "Truth is Political", y = NULL) +
  theme_minimal() +
  theme(legend.position = "none")+
  geom_text(data = mean_values_TiP_CTB, aes(x = mean_value, y = as.factor(COVID_CTB_dummy), 
                                    label = paste("", round(mean_value, 2))),
            vjust = -2, hjust = -0.5, size = 3)
TiP_CTB

#### Political Orientation ----
PolOr_CTB <- ggplot(descriptives_CTB, aes(x = polorientation, y = as.factor(COVID_CTB_dummy), fill = as.factor(COVID_CTB_dummy))) + 
  geom_density_ridges(scale = 0.8) +
  stat_density_ridges(quantile_lines = TRUE, quantile_fun = mean) +
  scale_fill_brewer(palette = "Set2", name = "COVID CTB") +
  labs(x = "Political Orientation", y = NULL) +
  theme_minimal() +
  theme(legend.position = "none")
PolOr_CTB

# change bandwidth
PolOr_CTB <- ggplot(descriptives_CTB, aes(x = polorientation, y = as.factor(COVID_CTB_dummy), fill = as.factor(COVID_CTB_dummy))) + 
  geom_density_ridges(scale = 0.8) +
  stat_density_ridges(quantile_lines = TRUE, quantile_fun = mean, bandwidth = 0.4) +  # Adjust bandwidth here
  scale_fill_brewer(palette = "Set2", name = "COVID CTB") +
  labs(x = "Political Orientation", y = NULL) +
  theme_minimal() +
  theme(legend.position = "none")

# integrate mean labels
mean_values_PolOr_CTB <- descriptives_CTB %>%
  group_by(COVID_CTB_dummy) %>%
  summarise(mean_value = mean(polorientation))
PolOr_CTB <- ggplot(descriptives_CTB, aes(x = polorientation, y = as.factor(COVID_CTB_dummy), fill = as.factor(COVID_CTB_dummy))) + 
  geom_density_ridges(scale = 0.8) +
  stat_density_ridges(quantile_lines = TRUE, quantile_fun = mean, bandwidth = 0.4) +  # Adjust bandwidth here
  scale_fill_brewer(palette = "Set2", name = "COVID CTB") +
  labs(x = "Political Orientation", y = NULL) +
  theme_minimal() +
  theme(legend.position = "none")+
  geom_text(data = mean_values_PolOr_CTB, aes(x = mean_value, y = as.factor(COVID_CTB_dummy), 
                                    label = paste("", round(mean_value, 2))),
            vjust = -1, hjust = -0.5, size = 3) 
PolOr_CTB



### Create density plots for predictors of MBs ----
## Build dataset
descriptives_Misinfo<-dplyr::select(ds_analysis_paper2_CTBs_Ant,
                                    misinfo_covid_beliefs_dummy,
                                    ConspMent_index,
                                    EpistBel_IT_index,EpistBel_Truth_index,
                                    polorientation,religiousity,
                                    AltMed_index,
                                    TrustScience, TrustPolitical_index, TrustMedia_index, TrustAltMed_index,
                                    age,edu_high
                                    
) 

str(descriptives_Misinfo)

# Assign labels to each variable and factor level
descriptives_Misinfo <- descriptives_Misinfo %>%
  mutate(misinfo_covid_beliefs_dummy = set_label(misinfo_covid_beliefs_dummy, "Misinformation Belief (binary)"),
         ConspMent_index = set_label(ConspMent_index, "Conspiracy Mentality"),
         EpistBel_IT_index = set_label(EpistBel_IT_index, "Intuitive Thinking"),
         EpistBel_Truth_index = set_label(EpistBel_Truth_index, "Truth is Political"),
         polorientation = set_label(polorientation, "Political Orientation"),
         religiousity = set_label(religiousity, "Religiosity"),
         AltMed_index = set_label(AltMed_index, "Alternative Media Use"),
         TrustScience = set_label(TrustScience, "Trust in Science"),
         TrustMedia_index = set_label(TrustMedia_index, "Trust in Media"),
         TrustAltMed_index = set_label(TrustAltMed_index, "Trust in Alternative Media"),
         TrustPolitical_index = set_label(TrustPolitical_index, "Trust in Politics")  
  )
str(descriptives_Misinfo)


descriptives_Misinfo$misinfo_covid_beliefs_dummy <- factor(descriptives_Misinfo$misinfo_covid_beliefs_dummy,
                                                           levels = c(0, 1),
                                                           labels = c("Misinfo-Non-Believers", "Misinfo-Believers"))
freq(descriptives_Misinfo$misinfo_covid_beliefs_dummy)

#### Conspiracy Mentality ----
ConMen_Misinfo <- ggplot(descriptives_Misinfo, aes(x = ConspMent_index, y = as.factor(misinfo_covid_beliefs_dummy), fill = as.factor(misinfo_covid_beliefs_dummy))) + 
  geom_density_ridges(scale = 0.8) +
  stat_density_ridges(quantile_lines = TRUE, quantile_fun = mean) +
  scale_fill_brewer(palette = "Set2", name = "COVID CTB") +
  labs(x = "Conspiracy Mentality", y = NULL) +
  theme_minimal() +
  theme(legend.position = "none")
ConMen_Misinfo

# Add label for the mean
# Calculate means for each group
mean_values_CM_MB <- descriptives_Misinfo %>%
  group_by(misinfo_covid_beliefs_dummy) %>%
  summarise(mean_value = mean(ConspMent_index))

# Create the plot
ConMen_Misinfo <- ggplot(descriptives_Misinfo, aes(x = ConspMent_index, y = as.factor(misinfo_covid_beliefs_dummy), fill = as.factor(misinfo_covid_beliefs_dummy))) + 
  geom_density_ridges(scale = 0.8) +
  stat_density_ridges(quantile_lines = TRUE, quantile_fun = mean) +
  scale_fill_brewer(palette = "Set2", name = "COVID CTB") +
  labs(x = "Conspiracy Mentality", y = NULL) +
  theme_minimal() +
  theme(legend.position = "none")

# Add text labels for mean values
ConMen_Misinfo <- ConMen_Misinfo +
  geom_text(data = mean_values_CM_MB, aes(x = mean_value, y = as.factor(misinfo_covid_beliefs_dummy), 
                                    label = paste("", round(mean_value, 2))),
            vjust = -2, hjust = -0.5, size = 3)

# Final Plot
ConMen_Misinfo <- ggplot(descriptives_Misinfo, aes(x = ConspMent_index, y = as.factor(misinfo_covid_beliefs_dummy), fill = as.factor(misinfo_covid_beliefs_dummy))) + 
  geom_density_ridges(scale = 0.8) +
  stat_density_ridges(quantile_lines = TRUE, quantile_fun = mean) +
  scale_fill_brewer(palette = "Set2", name = "COVID CTB") +
  labs(x = "Conspiracy Mentality", y = NULL) +
  theme_minimal() +
  theme(legend.position = "none")+
  geom_text(data = mean_values_CM_MB, aes(x = mean_value, y = as.factor(misinfo_covid_beliefs_dummy), 
                                    label = paste("", round(mean_value, 2))),
            vjust = -2, hjust = -0.5, size = 3)
ConMen_Misinfo

#### Truth is Political ----
mean_values_TiP_MB <- descriptives_Misinfo %>%
  group_by(misinfo_covid_beliefs_dummy) %>%
  summarise(mean_value = mean(EpistBel_Truth_index))

TiP_Misinfo <- ggplot(descriptives_Misinfo, aes(x = EpistBel_Truth_index, y = as.factor(misinfo_covid_beliefs_dummy), fill = as.factor(misinfo_covid_beliefs_dummy))) + 
  geom_density_ridges(scale = 0.8) +
  stat_density_ridges(quantile_lines = TRUE, quantile_fun = mean) +
  scale_fill_brewer(palette = "Set2", name = "COVID CTB") +
  labs(x = "Truth is Political", y = NULL) +
  theme_minimal() +
  theme(legend.position = "none")+
  geom_text(data = mean_values_TiP_MB, aes(x = mean_value, y = as.factor(misinfo_covid_beliefs_dummy), 
                                           label = paste("", round(mean_value, 2))),
            vjust = -2, hjust = -0.5, size = 3)
TiP_Misinfo


### Combine plots on descriptives ----
(ConMen_CTB | TiP_CTB)  / (ConMen_Misinfo  | TiP_Misinfo) / (PolOr_CTB)

#### Remove labels ----
ConMen_CTB <- ConMen_CTB +
  labs(x = NULL, y = NULL)
TiP_CTB <- TiP_CTB +
  labs(x = NULL, y = NULL)

(ConMen_CTB | TiP_CTB)  / (ConMen_Misinfo  | TiP_Misinfo) / (PolOr_CTB)

#### Adjust value position ----
# CM CTBs
ConMen_CTB$layers <- ConMen_CTB$layers[!sapply(ConMen_CTB$layers, function(x) inherits(x$geom, "GeomText"))]

ConMen_CTB <- ConMen_CTB +
  geom_text(data = mean_values_CM_CTB, 
            aes(x = mean_value, y = as.factor(COVID_CTB_dummy), 
                label = paste("", round(mean_value, 2))),
            vjust = -1, hjust = -0.1, size = 3)
ConMen_CTB

# CM MBs
ConMen_Misinfo$layers <- ConMen_Misinfo$layers[!sapply(ConMen_Misinfo$layers, function(x) inherits(x$geom, "GeomText"))]

ConMen_Misinfo <- ConMen_Misinfo +
  geom_text(data = mean_values_CM_MB, 
            aes(x = mean_value, y = as.factor(misinfo_covid_beliefs_dummy), 
                label = paste("", round(mean_value, 2))),
            vjust = -1, hjust = -0.1, size = 3)
ConMen_Misinfo

# TiP MBs
TiP_Misinfo$layers <- TiP_Misinfo$layers[!sapply(TiP_Misinfo$layers, function(x) inherits(x$geom, "GeomText"))]

TiP_Misinfo <- TiP_Misinfo +
  geom_text(data = mean_values_TiP_MB, 
            aes(x = mean_value, y = as.factor(misinfo_covid_beliefs_dummy), 
                label = paste("", round(mean_value, 2))),
            vjust = -1, hjust = -0.1, size = 3)
TiP_Misinfo

# TiP CTB
TiP_CTB$layers <- TiP_CTB$layers[!sapply(TiP_CTB$layers, function(x) inherits(x$geom, "GeomText"))]

TiP_CTB <- TiP_CTB +
  geom_text(data = mean_values_TiP_CTB, 
            aes(x = mean_value, y = as.factor(COVID_CTB_dummy), 
                label = paste("", round(mean_value, 2))),
            vjust = -1, hjust = -0.1, size = 3)
TiP_CTB


# PolOr CTB
PolOr_CTB$layers <- PolOr_CTB$layers[!sapply(PolOr_CTB$layers, function(x) inherits(x$geom, "GeomText"))]

PolOr_CTB <- PolOr_CTB +
  geom_text(data = mean_values_PolOr_CTB, 
            aes(x = mean_value, y = as.factor(COVID_CTB_dummy), 
                label = paste("", round(mean_value, 2))),
            vjust = -1, hjust = -0.1, size = 3)
PolOr_CTB

(ConMen_CTB | TiP_CTB)  / (ConMen_Misinfo  | TiP_Misinfo) / (PolOr_CTB)


#### Integrate space ----
combined_plot <- ConMen_CTB + plot_spacer()  +  TiP_CTB + ConMen_Misinfo  + plot_spacer()  + TiP_Misinfo + plot_spacer() + plot_spacer()  + plot_spacer() + PolOr_CTB + plot_spacer() + plot_spacer() +
  plot_layout(ncol = 3) + 
  plot_layout(heights = c(4, 4, 1 ,4, 4, 1, 4)) + plot_layout(widths = c(4, 1, 4))
combined_plot

### Export final Plot ----
ggsave(
  "_figures/2_DensityPlot_Motives_CTB_and_Misinfo_combined.png",
  plot = combined_plot,
  width = 17,        
  height = 11,       
  dpi = 300         
)

## Plot descriptives of significant controls in one graph ----
#### Trust in Science CTB ----
mean_values <- descriptives_CTB %>%
  group_by(COVID_CTB_dummy) %>%
  summarise(mean_value = mean(TrustScience))

TrustS_CTB <- ggplot(descriptives_CTB, aes(x = TrustScience, y = as.factor(COVID_CTB_dummy), fill = as.factor(COVID_CTB_dummy))) + 
  geom_density_ridges(scale = 0.8) +
  stat_density_ridges(quantile_lines = TRUE, quantile_fun = mean) +
  scale_fill_brewer(palette = "Set2", name = "COVID CTB") +
  labs(x = "Trust in Science", y = NULL) +
  theme_minimal() +
  theme(legend.position = "none")+
  geom_text(data = mean_values, aes(x = mean_value, y = as.factor(COVID_CTB_dummy), 
                                    label = paste("", round(mean_value, 2))),
            vjust = -4, hjust = -0.2, size = 3) 
TrustS_CTB

#### Trust in Politics CTB ----
mean_values_TP_CTB <- descriptives_CTB %>%
  group_by(COVID_CTB_dummy) %>%
  summarise(mean_value = mean(TrustPolitical_index))

TrustP_CTB <- ggplot(descriptives_CTB, aes(x = TrustPolitical_index, y = as.factor(COVID_CTB_dummy), fill = as.factor(COVID_CTB_dummy))) + 
  geom_density_ridges(scale = 0.8) +
  stat_density_ridges(quantile_lines = TRUE, quantile_fun = mean) +
  scale_fill_brewer(palette = "Set2", name = "COVID CTB") +
  labs(x = "Trust in Politics", y = NULL) +
  theme_minimal() +
  theme(legend.position = "none")+
  geom_text(data = mean_values_TP_CTB, aes(x = mean_value, y = as.factor(COVID_CTB_dummy), 
                                    label = paste("", round(mean_value, 2))),
            vjust = -3, hjust = -0.1, size = 3) 
TrustP_CTB

#### Trust in Media CTB ----
mean_values <- descriptives_CTB %>%
  group_by(COVID_CTB_dummy) %>%
  summarise(mean_value = mean(TrustMedia_index))

TrustM_CTB <- ggplot(descriptives_CTB, aes(x = TrustMedia_index, y = as.factor(COVID_CTB_dummy), fill = as.factor(COVID_CTB_dummy))) + 
  geom_density_ridges(scale = 0.8) +
  stat_density_ridges(quantile_lines = TRUE, quantile_fun = mean) +
  scale_fill_brewer(palette = "Set2", name = "COVID CTB") +
  labs(x = "Trust in Media", y = NULL) +
  theme_minimal() +
  theme(legend.position = "none")+
  geom_text(data = mean_values, aes(x = mean_value, y = as.factor(COVID_CTB_dummy), 
                                    label = paste("", round(mean_value, 2))),
            vjust = -4, hjust = -0.2, size = 3) 
TrustM_CTB

#### Trust in Alternative Media CTB ----
mean_values <- descriptives_CTB %>%
  group_by(COVID_CTB_dummy) %>%
  summarise(mean_value = mean(TrustAltMed_index))

TrustAM_CTB <- ggplot(descriptives_CTB, aes(x = TrustAltMed_index, y = as.factor(COVID_CTB_dummy), fill = as.factor(COVID_CTB_dummy))) + 
  geom_density_ridges(scale = 0.8) +
  stat_density_ridges(quantile_lines = TRUE, quantile_fun = mean) +
  scale_fill_brewer(palette = "Set2", name = "COVID CTB") +
  labs(x = "Trust in Alternative Media", y = NULL) +
  theme_minimal() +
  theme(legend.position = "none")+
  geom_text(data = mean_values, aes(x = mean_value, y = as.factor(COVID_CTB_dummy), 
                                    label = paste("", round(mean_value, 2))),
            vjust = -4, hjust = -0.2, size = 3) 
TrustAM_CTB

#### Age CTB ----
mean_values_age_CTB <- descriptives_CTB %>%
  group_by(COVID_CTB_dummy) %>%
  summarise(mean_value = mean(age))

Age_CTB <- ggplot(descriptives_CTB, aes(x = age, y = as.factor(COVID_CTB_dummy), fill = as.factor(COVID_CTB_dummy))) + 
  geom_density_ridges(scale = 0.8) +
  stat_density_ridges(quantile_lines = TRUE, quantile_fun = mean) +
  scale_fill_brewer(palette = "Set2", name = "COVID CTB") +
  labs(x = "Age", y = NULL) +
  theme_minimal() +
  theme(legend.position = "none")+
  geom_text(data = mean_values_age_CTB, aes(x = mean_value, y = as.factor(COVID_CTB_dummy), 
                                            label = paste("", round(mean_value, 2))),
            vjust = -2, hjust = -0.5, size = 3)
Age_CTB

#### Trust in Alternative Media MBs ----
mean_values_TAM <- descriptives_Misinfo %>%
  group_by(misinfo_covid_beliefs_dummy) %>%
  summarise(mean_value = mean(TrustAltMed_index))

TMediaAlt_Misinfo <- ggplot(descriptives_Misinfo, aes(x = TrustAltMed_index, y = as.factor(misinfo_covid_beliefs_dummy), fill = as.factor(misinfo_covid_beliefs_dummy))) + 
  geom_density_ridges(scale = 0.8) +
  stat_density_ridges(quantile_lines = TRUE, quantile_fun = mean) +
  scale_fill_brewer(palette = "Set2", name = "COVID CTB") +
  labs(x = "Trust in Alternative Media", y = NULL) +
  theme_minimal() +
  theme(legend.position = "none") +
  geom_text(data = mean_values_TAM, aes(x = mean_value, y = as.factor(misinfo_covid_beliefs_dummy), 
                                        label = paste("", round(mean_value, 2))),
            vjust = -2, hjust = -0.5, size = 3)
TMediaAlt_Misinfo


#### Trust in Science MBs ----
mean_values_tscience_misinfo <- descriptives_Misinfo %>%
  group_by(misinfo_covid_beliefs_dummy) %>%
  summarise(mean_value = mean(TrustScience))

TScience_Misinfo <- ggplot(descriptives_Misinfo, aes(x = TrustScience, y = as.factor(misinfo_covid_beliefs_dummy), fill = as.factor(misinfo_covid_beliefs_dummy))) + 
  geom_density_ridges(scale = 0.8) +
  stat_density_ridges(quantile_lines = TRUE, quantile_fun = mean) +
  scale_fill_brewer(palette = "Set2", name = "COVID CTB") +
  labs(x = "Trust in Science", y = NULL) +
  theme_minimal() +
  theme(legend.position = "none")+
  geom_text(data = mean_values_tscience_misinfo, aes(x = mean_value, y = as.factor(misinfo_covid_beliefs_dummy), 
                                                     label = paste("", round(mean_value, 2))),
            vjust = -2, hjust = -0.5, size = 3)
TScience_Misinfo

#### Trust in Politics MBs ----
mean_values_political <- descriptives_Misinfo %>%
  group_by(misinfo_covid_beliefs_dummy) %>%
  summarise(mean_value = mean(TrustPolitical_index))

TPolitical_Misinfo <- ggplot(descriptives_Misinfo, aes(x = TrustPolitical_index, y = as.factor(misinfo_covid_beliefs_dummy), fill = as.factor(misinfo_covid_beliefs_dummy))) + 
  geom_density_ridges(scale = 0.8) +
  stat_density_ridges(quantile_lines = TRUE, quantile_fun = mean) +
  scale_fill_brewer(palette = "Set2", name = "COVID CTB") +
  labs(x = "Trust in Politics", y = NULL) +
  theme_minimal() +
  theme(legend.position = "none") +
  geom_text(data = mean_values_political, aes(x = mean_value, y = as.factor(misinfo_covid_beliefs_dummy), 
                                              label = paste("", round(mean_value, 2))),
            vjust = -2, hjust = -0.1, size = 3)
TPolitical_Misinfo

#### Age MBs ----
mean_values_age_MB <- descriptives_Misinfo %>%
  group_by(misinfo_covid_beliefs_dummy) %>%
  summarise(mean_value = mean(age))

Age_Misinfo <- ggplot(descriptives_Misinfo, aes(x = age, y = as.factor(misinfo_covid_beliefs_dummy), fill = as.factor(misinfo_covid_beliefs_dummy))) + 
  geom_density_ridges(scale = 0.8) +
  stat_density_ridges(quantile_lines = TRUE, quantile_fun = mean) +
  scale_fill_brewer(palette = "Set2", name = "COVID CTB") +
  labs(x = "Age", y = NULL) +
  theme_minimal() +
  theme(legend.position = "none")+
  geom_text(data = mean_values_age_MB, aes(x = mean_value, y = as.factor(misinfo_covid_beliefs_dummy), 
                                    label = paste("", round(mean_value, 2))),
            vjust = -2, hjust = -0.5, size = 3)
Age_Misinfo


### Combine plots ----
(TScience_Misinfo | TPolitical_Misinfo)  / (TrustS_CTB  | TrustP_CTB) / (TMediaAlt_Misinfo  | Age_Misinfo) / (TrustAM_CTB  | Age_CTB)  / TrustM_CTB

#### Remove labels ----
TScience_Misinfo <- TScience_Misinfo +
  labs(x = NULL, y = NULL)
TPolitical_Misinfo <- TPolitical_Misinfo +
  labs(x = NULL, y = NULL)
TMediaAlt_Misinfo <- TMediaAlt_Misinfo +
  labs(x = NULL, y = NULL)
Age_Misinfo <- Age_Misinfo +
  labs(x = NULL, y = NULL)

(TScience_Misinfo | TPolitical_Misinfo)  / (TrustS_CTB  | TrustP_CTB) / (TMediaAlt_Misinfo  | Age_Misinfo) / (TrustAM_CTB  | Age_CTB)  / TrustM_CTB

#### Integrate space ----
combined_plot <- (
  TScience_Misinfo + plot_spacer() + TPolitical_Misinfo + 
    TrustS_CTB + plot_spacer() + TrustP_CTB + 
    plot_spacer() + plot_spacer() + plot_spacer() + 
    TMediaAlt_Misinfo + plot_spacer() + Age_Misinfo +
    TrustAM_CTB + plot_spacer() + Age_CTB +
    plot_spacer() + plot_spacer() + plot_spacer() + 
    TrustM_CTB + plot_spacer() + plot_spacer()
) + 
  plot_layout(
    ncol = 3,
    heights = c(4, 4, 1, 4, 4, 1, 4),
    widths = c(4, 1, 4)
  )
combined_plot

### Export final Plot ----
ggsave(
  "_figures/2_DensityPlot_Controls_CTB_and_Misinfo_combined.png",
  plot = combined_plot,
  width = 17,        
  height = 11,       
  dpi = 300         
)
# --------------------------------------------------------------------------------
## Descriptives of significant controls for the text ----
### Trust in Science ----
ctb_stats <- descriptives_CTB %>%
  group_by(COVID_CTB_dummy) %>%
  summarise(
    M = mean(TrustScience, na.rm = TRUE),
    SD = sd(TrustScience, na.rm = TRUE)
  )

misinfo_stats <- descriptives_Misinfo %>%
  group_by(misinfo_covid_beliefs_dummy) %>%
  summarise(
    M = mean(TrustScience, na.rm = TRUE),
    SD = sd(TrustScience, na.rm = TRUE)
  )

ctb_stats
misinfo_stats

sprintf(
  "CT: M (SD)believers = %.2f (%.2f); M (SD)non-bel. = %.2f (%.2f); Misinformation: M (SD)believers = %.2f (%.2f); M (SD)non-bel. = %.2f (%.2f).",
  ctb_stats$M[ctb_stats$COVID_CTB_dummy == "CT-Believers"],
  ctb_stats$SD[ctb_stats$COVID_CTB_dummy == "CT-Believers"],
  ctb_stats$M[ctb_stats$COVID_CTB_dummy == "CT-Non-Believers"],
  ctb_stats$SD[ctb_stats$COVID_CTB_dummy == "CT-Non-Believers"],
  misinfo_stats$M[misinfo_stats$misinfo_covid_beliefs_dummy == "Misinfo-Believers"],
  misinfo_stats$SD[misinfo_stats$misinfo_covid_beliefs_dummy == "Misinfo-Believers"],
  misinfo_stats$M[misinfo_stats$misinfo_covid_beliefs_dummy == "Misinfo-Non-Believers"],
  misinfo_stats$SD[misinfo_stats$misinfo_covid_beliefs_dummy == "Misinfo-Non-Believers"]
)
### Trust in Politics ----
library(dplyr)

ctb_stats_political <- descriptives_CTB %>%
  group_by(COVID_CTB_dummy) %>%
  summarise(
    M = mean(TrustPolitical_index, na.rm = TRUE),
    SD = sd(TrustPolitical_index, na.rm = TRUE)
  )

misinfo_stats_political <- descriptives_Misinfo %>%
  group_by(misinfo_covid_beliefs_dummy) %>%
  summarise(
    M = mean(TrustPolitical_index, na.rm = TRUE),
    SD = sd(TrustPolitical_index, na.rm = TRUE)
  )

ctb_stats_political
misinfo_stats_political

sprintf(
  "CT: M (SD)believers = %.2f (%.2f); M (SD)non-bel. = %.2f (%.2f); Misinformation: M (SD)believers = %.2f (%.2f); M (SD)non-bel. = %.2f (%.2f).",
  ctb_stats_political$M[ctb_stats_political$COVID_CTB_dummy == "CT-Believers"],
  ctb_stats_political$SD[ctb_stats_political$COVID_CTB_dummy == "CT-Believers"],
  ctb_stats_political$M[ctb_stats_political$COVID_CTB_dummy == "CT-Non-Believers"],
  ctb_stats_political$SD[ctb_stats_political$COVID_CTB_dummy == "CT-Non-Believers"],
  misinfo_stats_political$M[misinfo_stats_political$misinfo_covid_beliefs_dummy == "Misinfo-Believers"],
  misinfo_stats_political$SD[misinfo_stats_political$misinfo_covid_beliefs_dummy == "Misinfo-Believers"],
  misinfo_stats_political$M[misinfo_stats_political$misinfo_covid_beliefs_dummy == "Misinfo-Non-Believers"],
  misinfo_stats_political$SD[misinfo_stats_political$misinfo_covid_beliefs_dummy == "Misinfo-Non-Believers"]
)

### Trust in Alternative Media ----
ctb_stats_altmed <- descriptives_CTB %>%
  group_by(COVID_CTB_dummy) %>%
  summarise(
    M = mean(TrustAltMed_index, na.rm = TRUE),
    SD = sd(TrustAltMed_index, na.rm = TRUE)
  )

misinfo_stats_altmed <- descriptives_Misinfo %>%
  group_by(misinfo_covid_beliefs_dummy) %>%
  summarise(
    M = mean(TrustAltMed_index, na.rm = TRUE),
    SD = sd(TrustAltMed_index, na.rm = TRUE)
  )

ctb_stats_altmed
misinfo_stats_altmed

sprintf(
  "CT: M (SD)believers = %.2f (%.2f); M (SD)non-bel. = %.2f (%.2f); Misinformation: M (SD)believers = %.2f (%.2f); M (SD)non-bel. = %.2f (%.2f).",
  ctb_stats_altmed$M[ctb_stats_altmed$COVID_CTB_dummy == "CT-Believers"],
  ctb_stats_altmed$SD[ctb_stats_altmed$COVID_CTB_dummy == "CT-Believers"],
  ctb_stats_altmed$M[ctb_stats_altmed$COVID_CTB_dummy == "CT-Non-Believers"],
  ctb_stats_altmed$SD[ctb_stats_altmed$COVID_CTB_dummy == "CT-Non-Believers"],
  misinfo_stats_altmed$M[misinfo_stats_altmed$misinfo_covid_beliefs_dummy == "Misinfo-Believers"],
  misinfo_stats_altmed$SD[misinfo_stats_altmed$misinfo_covid_beliefs_dummy == "Misinfo-Believers"],
  misinfo_stats_altmed$M[misinfo_stats_altmed$misinfo_covid_beliefs_dummy == "Misinfo-Non-Believers"],
  misinfo_stats_altmed$SD[misinfo_stats_altmed$misinfo_covid_beliefs_dummy == "Misinfo-Non-Believers"]
)
### Trust in Media ----
ctb_stats_media <- descriptives_CTB %>%
  group_by(COVID_CTB_dummy) %>%
  summarise(
    M = mean(TrustMedia_index, na.rm = TRUE),
    SD = sd(TrustMedia_index, na.rm = TRUE)
  )

ctb_stats_media

sprintf(
  "CT: M (SD)believers = %.2f (%.2f); M (SD)non-bel. = %.2f (%.2f).",
  ctb_stats_media$M[ctb_stats_media$COVID_CTB_dummy == "CT-Believers"],
  ctb_stats_media$SD[ctb_stats_media$COVID_CTB_dummy == "CT-Believers"],
  ctb_stats_media$M[ctb_stats_media$COVID_CTB_dummy == "CT-Non-Believers"],
  ctb_stats_media$SD[ctb_stats_media$COVID_CTB_dummy == "CT-Non-Believers"]
)

### Age ----
ctb_stats_age <- descriptives_CTB %>%
  group_by(COVID_CTB_dummy) %>%
  summarise(
    M = mean(age, na.rm = TRUE),
    SD = sd(age, na.rm = TRUE)
  )

misinfo_stats_age <- descriptives_Misinfo %>%
  group_by(misinfo_covid_beliefs_dummy) %>%
  summarise(
    M = mean(age, na.rm = TRUE),
    SD = sd(age, na.rm = TRUE)
  )

ctb_stats_age
misinfo_stats_age

sprintf(
  "CT: M (SD)believers = %.2f (%.2f); M (SD)non-bel. = %.2f (%.2f); Misinformation: M (SD)believers = %.2f (%.2f); M (SD)non-bel. = %.2f (%.2f).",
  ctb_stats_age$M[ctb_stats_age$COVID_CTB_dummy == "CT-Believers"],
  ctb_stats_age$SD[ctb_stats_age$COVID_CTB_dummy == "CT-Believers"],
  ctb_stats_age$M[ctb_stats_age$COVID_CTB_dummy == "CT-Non-Believers"],
  ctb_stats_age$SD[ctb_stats_age$COVID_CTB_dummy == "CT-Non-Believers"],
  misinfo_stats_age$M[misinfo_stats_age$misinfo_covid_beliefs_dummy == "Misinfo-Believers"],
  misinfo_stats_age$SD[misinfo_stats_age$misinfo_covid_beliefs_dummy == "Misinfo-Believers"],
  misinfo_stats_age$M[misinfo_stats_age$misinfo_covid_beliefs_dummy == "Misinfo-Non-Believers"],
  misinfo_stats_age$SD[misinfo_stats_age$misinfo_covid_beliefs_dummy == "Misinfo-Non-Believers"]
)

### Education ----
ctb_edu <- ds_analysis_paper2_CTBs_Ant %>%
  filter(edu_high %in% c("High Education", "Low Education")) %>%
  group_by(COVID_CTB_dummy) %>%
  summarise(
    perc_high_edu = mean(edu_high == "High Education", na.rm = TRUE) * 100
  )
misinfo_edu <- ds_analysis_paper2_CTBs_Ant %>%
  filter(edu_high %in% c("High Education", "Low Education")) %>%
  group_by(misinfo_covid_beliefs_dummy) %>%
  summarise(
    perc_high_edu = mean(edu_high == "High Education", na.rm = TRUE) * 100
  )

ctb_edu
misinfo_edu

sprintf(
  "CT: %.1f%% of non-believers and %.1f%% of believers have high education; Misinformation: %.1f%% of non-believers and %.1f%% of believers have high education.",
  ctb_edu$perc_high_edu[ctb_edu$COVID_CTB_dummy == "0"],
  ctb_edu$perc_high_edu[ctb_edu$COVID_CTB_dummy == "1"],
  misinfo_edu$perc_high_edu[misinfo_edu$misinfo_covid_beliefs_dummy == "0"],
  misinfo_edu$perc_high_edu[misinfo_edu$misinfo_covid_beliefs_dummy == "1"]
)

--------------------------------------------------
# --------------------------------------------------------------------------------
## Contextualization of results ----
### Because distributions are very similar: check overlap between CT and Misinfo-Believers ----
count_both_ones <- ds_analysis_paper2_CTBs_Ant %>%
  filter(COVID_CTB_dummy == 1 & misinfo_covid_beliefs_dummy == 1) %>%
  nrow()
print(count_both_ones)

freq(ds_analysis_paper2_CTBs_Ant$COVID_CTB_dummy)
freq(ds_analysis_paper2_CTBs_Ant$misinfo_covid_beliefs_dummy)
cor(ds_analysis_paper2_CTBs_Ant$COVID_CTB, ds_analysis_paper2_CTBs_Ant$misinfo_covid_beliefs)
## Result: 1162 out of 1553 (CT) respectively 1433 (Misinfo) also believe in the other kind of distorted information
### Check Interactions ----
ds_analysis_paper2_CTBs_Ant <- ds_analysis_paper2_CTBs_Ant %>%
  mutate(
    new_factor = interaction(misinfo_covid_beliefs_dummy, COVID_CTB_dummy)
  )
freq(ds_analysis_paper2_CTBs_Ant$new_factor)

### Describe extreme groups for epistemic motives ----
#### Truth is Political in CT ----
freq(ds_analysis_paper2_CTBs_Ant$COVID_CTB_dummy)
freq(ds_analysis_paper2_CTBs_Ant$EpistBel_Truth_index)

group_counts <- ds_analysis_paper2_CTBs_Ant %>%
  filter(EpistBel_Truth_index >= 6) %>%
  group_by(COVID_CTB_dummy) %>%
  summarise(count = n())

total_counts <- ds_analysis_paper2_CTBs_Ant %>%
  group_by(COVID_CTB_dummy) %>%
  summarise(total = n())

result <- merge(group_counts, total_counts, by = "COVID_CTB_dummy")

result <- result %>%
  mutate(percentage = (count / total) * 100)

print(result)

#### Truth is Political in Misinformation ----
freq(ds_analysis_paper2_CTBs_Ant$misinfo_covid_beliefs_dummy)
freq(ds_analysis_paper2_CTBs_Ant$EpistBel_Truth_index)

group_counts <- ds_analysis_paper2_CTBs_Ant %>%
  filter(EpistBel_Truth_index >= 6) %>%
  group_by(misinfo_covid_beliefs_dummy) %>%
  summarise(count = n())

total_counts <- ds_analysis_paper2_CTBs_Ant %>%
  group_by(misinfo_covid_beliefs_dummy) %>%
  summarise(total = n())

result <- merge(group_counts, total_counts, by = "misinfo_covid_beliefs_dummy")

result <- result %>%
  mutate(percentage = (count / total) * 100)

print(result)

# --------------------------------------------------------------------------------
# ROBUSTNESS CHECK: ANTECEDENTS OF CTBs (continuous): MULTIVARIATE LINEAR REGRESSION ----
# --------------------------------------------------------------------------------
## Build dataset ----
ds_analysis_paper2_CTBs_Ant<-dplyr::select(ds_MisInfoCT_analysis,
                                           COVID_CTB,misinfo_covid_beliefs,
                                           ConspMent_index,
                                           MediaUse_traditional_index,MediaUse_online_index,
                                           MediaUse_socialMedia_index,AltMed_index,
                                           EpistBel_IT_index,EpistBel_Truth_index,EpistBel_AOT_index,
                                           SDO_index,
                                           TrustMedia_index,TrustAltMed_index,TrustScience,TrustPolitical_index,
                                           gender_male,age,edu_high,religiousity,polorientation
) 

str(ds_analysis_paper2_CTBs_Ant)

# Assign labels to each variable
ds_analysis_paper2_CTBs_Ant <- ds_analysis_paper2_CTBs_Ant %>%
  mutate(COVID_CTB = set_label(COVID_CTB, "COVID Conspiracy Theory Belief (continous)"),
         misinfo_covid_beliefs = set_label(misinfo_covid_beliefs, "Misinformation COVID Beliefs (continous)"),
         ConspMent_index = set_label(ConspMent_index, "Conspiracy Mentality"),
         MediaUse_traditional_index = set_label(MediaUse_traditional_index, "Traditional Media Use"),
         MediaUse_online_index = set_label(MediaUse_online_index, "Online Media Use"),
         MediaUse_socialMedia_index = set_label(MediaUse_socialMedia_index, "Social Media Use"),
         AltMed_index = set_label(AltMed_index, "Alternative Media"),
         EpistBel_IT_index = set_label(EpistBel_IT_index, "Intuitive Thinking"),
         EpistBel_Truth_index = set_label(EpistBel_Truth_index, "Truth is Political"),
         EpistBel_AOT_index = set_label(EpistBel_AOT_index, "Actively Open-Minded Thinking"),
         SDO_index = set_label(SDO_index, "Social Dominance Orientation"),
         TrustMedia_index = set_label(TrustMedia_index, "Trust in Media"),
         TrustAltMed_index = set_label(TrustAltMed_index, "Trust in Alternative Media"),
         TrustScience = set_label(TrustScience, "Trust in Science"),
         TrustPolitical_index = set_label(TrustPolitical_index, "Trust in Politics"),
         gender_male = set_label(gender_male, "Gender Male"),
         age = set_label(age, "Age"),
         edu_high = set_label(edu_high, "Education High"),
         religiousity = set_label(religiousity, "Religiosity"),
         polorientation = set_label(polorientation, "Political Orientation"))

## Fit the initial linear regression model ----
model_lin <- lm(COVID_CTB ~ ConspMent_index +
                  EpistBel_Truth_index + AltMed_index + 
                  EpistBel_IT_index + polorientation + 
                  MediaUse_socialMedia_index + religiousity +
                  SDO_index +  gender_male + EpistBel_AOT_index +
                  MediaUse_traditional_index +  age + 
                  MediaUse_online_index + edu_high +
                  TrustScience + TrustPolitical_index + TrustMedia_index + TrustAltMed_index, 
                data = ds_analysis_paper2_CTBs_Ant)
summary(model_lin)

report::report_table(model_lin)

## Check assumptions ----
### Assumption 1: Linear relationship between continous predictors and outcome ----

#### Scatterplots with loess-line  ----
# Define your data frame
data <- ds_analysis_paper2_CTBs_Ant
# Create scatterplots for each numeric predictor
p1 <- ggplot(data, aes(x = ConspMent_index, y = COVID_CTB)) +
  geom_point(alpha = 0.6) + 
  geom_smooth(method = "loess", se = FALSE, color = "blue") +
  labs(x = "Conspiracy Mentality Index",
       y = "COVID CTB") +
  theme_minimal()


p2 <- ggplot(data, aes(x = MediaUse_traditional_index, y = COVID_CTB)) +
  geom_point(alpha = 0.6) + 
  geom_smooth(method = "loess", se = FALSE, color = "blue") +
  labs(x = "Media Use Traditional Index",
       y = "COVID CTB") +
  theme_minimal()

p3 <- ggplot(data, aes(x = MediaUse_online_index, y = COVID_CTB)) +
  geom_point(alpha = 0.6) + 
  geom_smooth(method = "loess", se = FALSE, color = "blue") +
  labs(x = "Media Use Online Index",
       y = "COVID CTB") +
  theme_minimal()

p4 <- ggplot(data, aes(x = MediaUse_socialMedia_index, y = COVID_CTB)) +
  geom_point(alpha = 0.6) + 
  geom_smooth(method = "loess", se = FALSE, color = "blue") +
  labs(x = "Social Media Use Total Index",
       y = "COVID CTB") +
  theme_minimal()

p5 <- ggplot(data, aes(x = AltMed_index, y = COVID_CTB)) +
  geom_point(alpha = 0.6) + 
  geom_smooth(method = "loess", se = FALSE, color = "blue") +
  labs(x = "Alternative Media Use Index",
       y = "COVID CTB") +
  theme_minimal()

p6 <- ggplot(data, aes(x = EpistBel_IT_index, y = COVID_CTB)) +
  geom_point(alpha = 0.6) + 
  geom_smooth(method = "loess", se = FALSE, color = "blue") +
  labs(x = "Epistemic Belief IT Index",
       y = "COVID CTB") +
  theme_minimal()

p7 <- ggplot(data, aes(x = EpistBel_Truth_index, y = COVID_CTB)) +
  geom_point(alpha = 0.6) + 
  geom_smooth(method = "loess", se = FALSE, color = "blue") +
  labs(x = "Epistemic Belief Truth Index",
       y = "COVID CTB") +
  theme_minimal()

p8 <- ggplot(data, aes(x = EpistBel_AOT_index, y = COVID_CTB)) +
  geom_point(alpha = 0.6) + 
  geom_smooth(method = "loess", se = FALSE, color = "blue") +
  labs(x = "Epistemic Belief AOT Index",
       y = "COVID CTB") +
  theme_minimal()

p9 <- ggplot(data, aes(x = SDO_index, y = COVID_CTB)) +
  geom_point(alpha = 0.6) + 
  geom_smooth(method = "loess", se = FALSE, color = "blue") +
  labs(x = "SDO Index",
       y = "COVID CTB") +
  theme_minimal()

p10 <- ggplot(data, aes(x = TrustMedia_index, y = COVID_CTB)) +
  geom_point(alpha = 0.6) + 
  geom_smooth(method = "loess", se = FALSE, color = "blue") +
  labs(x = "Trust in Media Index",
       y = "COVID CTB") +
  theme_minimal()

p11 <- ggplot(data, aes(x = TrustScience, y = COVID_CTB)) +
  geom_point(alpha = 0.6) + 
  geom_smooth(method = "loess", se = FALSE, color = "blue") +
  labs(x = "Trust in Science",
       y = "COVID CTB") +
  theme_minimal()

p12 <- ggplot(data, aes(x = TrustPolitical_index, y = COVID_CTB)) +
  geom_point(alpha = 0.6) + 
  geom_smooth(method = "loess", se = FALSE, color = "blue") +
  labs(x = "Trust in Political Index",
       y = "COVID CTB") +
  theme_minimal()

p13 <- ggplot(data, aes(x = age, y = COVID_CTB)) +
  geom_point(alpha = 0.6) + 
  geom_smooth(method = "loess", se = FALSE, color = "blue") +
  labs(x = "Age",
       y = "COVID CTB") +
  theme_minimal()

p14 <- ggplot(data, aes(x = religiousity, y = COVID_CTB)) +
  geom_point(alpha = 0.6) + 
  geom_smooth(method = "loess", se = FALSE, color = "blue") +
  labs(x = "Religiousity",
       y = "COVID CTB") +
  theme_minimal()

p15 <- ggplot(data, aes(x = polorientation, y = COVID_CTB)) +
  geom_point(alpha = 0.6) + 
  geom_smooth(method = "loess", se = FALSE, color = "blue") +
  labs(x = "Political Orientation",
       y = "COVID CTB") +
  theme_minimal()


p16 <- ggplot(data, aes(x = TrustAltMed_index, y = COVID_CTB)) +
  geom_point(alpha = 0.6) + 
  geom_smooth(method = "loess", se = FALSE, color = "blue") +
  labs(x = "Trust in Media Index",
       y = "COVID CTB") +
  theme_minimal()


# Combine the scatterplots into a grid
grid.arrange(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15, p16, ncol = 3)

#### Rainbow-Test ----
raintest(model_lin)
### Result: assumption of linearity not violated

### Assumption 2: Homoscedasticity (equality of variances) of the residuals ----
plot(model_lin, which = 1)
ols_test_breusch_pagan(model_lin)
#### Result: assumption violated, heteroscedasticity detected
### Assumption 3: Normality of residuals  ----
# QQ-Plot
plot(model_lin, which = 2)
# Histogram of residuals
ols_plot_resid_hist(model_lin)
# Shapiro-Wilk-Test
shapiro.test(model_lin$residuals)
#### Result: assumption violated, non-normality detected

### Assumption 4: No multicollinearity  ----
car::vif(model_lin)
#### Result: assumption given, none of the VIF values ≥ 10

### Assumption 5: No outliers  ----
# Cook's Distanz
plot(model_lin, 4)

# Identify cases where cooks distance is > 4/N
cooks_distance <- cooks.distance(model_lin)
influential_points <- which(cooks_distance > 4/length(cooks_distance))
influential_points

# Identify cases where cooks distance is > 1/N
cooks_distance <- cooks.distance(model_lin)
influential_points <- which(cooks_distance > 1/length(cooks_distance))
influential_points
#### Result: assumption violated, several outliers

## Fit the final linear regression model ----
### with robust standard errors to account for heteroskedasticity ----

coeftest(model_lin, vcov = vcovHC(model_lin, "HC1"))
### with bootstrapping to account for non-normality of residuals ----
set.seed(12345)
model_CTB_boot <- lm(COVID_CTB ~ ConspMent_index +
                       MediaUse_traditional_index + MediaUse_online_index +
                       MediaUse_socialMedia_index + AltMed_index +
                       EpistBel_IT_index + EpistBel_Truth_index + EpistBel_AOT_index +
                       SDO_index +
                       TrustMedia_index + TrustAltMed_index + TrustScience + TrustPolitical_index +
                       gender_male + age + edu_high + religiousity + polorientation, data = ds_analysis_paper2_CTBs_Ant)
fit_CTB_boot <- Boot(model_CTB_boot, R = 5000)
summary(fit_CTB_boot)
confint_CTB<-confint(fit_CTB_boot, level = .95)
hist(fit_CTB_boot)

### with iterated re-weighted least squares (IRLS) to account for outliers ----
summary(rr.huber <- rlm(COVID_CTB ~ ConspMent_index +
                          MediaUse_traditional_index + MediaUse_online_index +
                          MediaUse_socialMedia_index + AltMed_index +
                          EpistBel_IT_index + EpistBel_Truth_index + EpistBel_AOT_index +
                          SDO_index +
                          TrustMedia_index + TrustAltMed_index + TrustScience + TrustPolitical_index +
                          gender_male + age + edu_high + religiousity + polorientation, 
                        data = ds_analysis_paper2_CTBs_Ant))

## Manuelly compare the models accounting for violations of assumptions with initial linear regression model ----
## Export linear regression model as table and graph using sjplot ----
tab_model(model_lin,show.std=TRUE,p.style = "stars",file = "_tables/2_lm_CTBs.html")

plot_lm<-sjPlot::plot_model(model_lin, sort.est = TRUE,type = "std",show.values = TRUE,value.offset = .4) 
plot_lm
ggsave("_figures/2_lm_CTBs.jpg", plot = plot_lm)

# --------------------------------------------------------------------------------
# ROBUSTNESS CHECK: ANTECEDENTS OF MISINFO (continuous): MULTIVARIATE LINEAR REGRESSION ----
# --------------------------------------------------------------------------------
## Build dataset ----
ds_analysis_paper2_Misinfo_Ant<-dplyr::select(ds_MisInfoCT_analysis,
                                              misinfo_covid_beliefs,
                                              ConspMent_index,
                                              MediaUse_traditional_index,MediaUse_online_index,
                                              MediaUse_socialMedia_index,AltMed_index,
                                              EpistBel_IT_index,EpistBel_Truth_index,EpistBel_AOT_index,
                                              SDO_index,
                                              TrustMedia_index,TrustAltMed_index, TrustScience,TrustPolitical_index,
                                              gender_male,age,edu_high,religiousity,polorientation
) 

str(ds_analysis_paper2_Misinfo_Ant)

# Assign labels to each variable
ds_analysis_paper2_Misinfo_Ant <- ds_analysis_paper2_Misinfo_Ant %>%
  mutate(misinfo_covid_beliefs = set_label(misinfo_covid_beliefs, "Misinformation COVID Beliefs (continous)"),
         ConspMent_index = set_label(ConspMent_index, "Conspiracy Mentality"),
         MediaUse_traditional_index = set_label(MediaUse_traditional_index, "Traditional Media Use"),
         MediaUse_online_index = set_label(MediaUse_online_index, "Online Media Use"),
         MediaUse_socialMedia_index = set_label(MediaUse_socialMedia_index, "Social Media Use"),
         AltMed_index = set_label(AltMed_index, "Alternative Media"),
         EpistBel_IT_index = set_label(EpistBel_IT_index, "Intuitive Thinking"),
         EpistBel_Truth_index = set_label(EpistBel_Truth_index, "Truth is Political"),
         EpistBel_AOT_index = set_label(EpistBel_AOT_index, "Actively Open-Minded Thinking"),
         SDO_index = set_label(SDO_index, "Social Dominance Orientation"),
         TrustMedia_index = set_label(TrustMedia_index, "Trust in Media"),
         TrustAltMed_index = set_label(TrustAltMed_index, "Trust in Alternative Media"),
         TrustScience = set_label(TrustScience, "Trust in Science"),
         TrustPolitical_index = set_label(TrustPolitical_index, "Trust in Politics"),
         gender_male = set_label(gender_male, "Gender Male"),
         age = set_label(age, "Age"),
         edu_high = set_label(edu_high, "Education High"),
         religiousity = set_label(religiousity, "Religiosity"),
         polorientation = set_label(polorientation, "Political Orientation"))

## Fit the initial linear regression model ----
model_lin <- lm(misinfo_covid_beliefs ~ ConspMent_index +
                  EpistBel_Truth_index + AltMed_index + 
                  EpistBel_IT_index + polorientation + 
                  MediaUse_socialMedia_index + religiousity +
                  SDO_index +  gender_male + EpistBel_AOT_index +
                  MediaUse_traditional_index +  age + 
                  MediaUse_online_index + edu_high +
                  TrustScience + TrustPolitical_index + TrustMedia_index + TrustAltMed_index, 
                data = ds_analysis_paper2_Misinfo_Ant)
summary(model_lin)

report::report_table(model_lin)

## Check assumptions ----
### Assumption 1: Linear relationship between continous predictors and outcome ----

#### Scatterplots with loess-line following http://www.regorz-statistik.de/inhalte/tutorial_regression_linearitaet.html#folgen  ----
# Define your data frame
data <- ds_analysis_paper2_Misinfo_Ant
# Create scatterplots for each numeric predictor
p1 <- ggplot(data, aes(x = ConspMent_index, y = misinfo_covid_beliefs)) +
  geom_point(alpha = 0.6) + 
  geom_smooth(method = "loess", se = FALSE, color = "blue") +
  labs(x = "Conspiracy Mentality Index",
       y = "COVID CTB") +
  theme_minimal()
p1

p2 <- ggplot(data, aes(x = MediaUse_traditional_index, y = misinfo_covid_beliefs)) +
  geom_point(alpha = 0.6) + 
  geom_smooth(method = "loess", se = FALSE, color = "blue") +
  labs(x = "Media Use Traditional Index",
       y = "COVID CTB") +
  theme_minimal()

p3 <- ggplot(data, aes(x = MediaUse_online_index, y = misinfo_covid_beliefs)) +
  geom_point(alpha = 0.6) + 
  geom_smooth(method = "loess", se = FALSE, color = "blue") +
  labs(x = "Media Use Online Index",
       y = "COVID CTB") +
  theme_minimal()

p4 <- ggplot(data, aes(x = MediaUse_socialMedia_index, y = misinfo_covid_beliefs)) +
  geom_point(alpha = 0.6) + 
  geom_smooth(method = "loess", se = FALSE, color = "blue") +
  labs(x = "Social Media Use Total Index",
       y = "COVID CTB") +
  theme_minimal()

p5 <- ggplot(data, aes(x = AltMed_index, y = misinfo_covid_beliefs)) +
  geom_point(alpha = 0.6) + 
  geom_smooth(method = "loess", se = FALSE, color = "blue") +
  labs(x = "Alternative Media Use Index",
       y = "COVID CTB") +
  theme_minimal()

p6 <- ggplot(data, aes(x = EpistBel_IT_index, y = misinfo_covid_beliefs)) +
  geom_point(alpha = 0.6) + 
  geom_smooth(method = "loess", se = FALSE, color = "blue") +
  labs(x = "Epistemic Belief IT Index",
       y = "COVID CTB") +
  theme_minimal()

p7 <- ggplot(data, aes(x = EpistBel_Truth_index, y = misinfo_covid_beliefs)) +
  geom_point(alpha = 0.6) + 
  geom_smooth(method = "loess", se = FALSE, color = "blue") +
  labs(x = "Epistemic Belief Truth Index",
       y = "COVID CTB") +
  theme_minimal()

p8 <- ggplot(data, aes(x = EpistBel_AOT_index, y = misinfo_covid_beliefs)) +
  geom_point(alpha = 0.6) + 
  geom_smooth(method = "loess", se = FALSE, color = "blue") +
  labs(x = "Epistemic Belief AOT Index",
       y = "COVID CTB") +
  theme_minimal()

p9 <- ggplot(data, aes(x = SDO_index, y = misinfo_covid_beliefs)) +
  geom_point(alpha = 0.6) + 
  geom_smooth(method = "loess", se = FALSE, color = "blue") +
  labs(x = "SDO Index",
       y = "COVID CTB") +
  theme_minimal()

p10 <- ggplot(data, aes(x = TrustMedia_index, y = misinfo_covid_beliefs)) +
  geom_point(alpha = 0.6) + 
  geom_smooth(method = "loess", se = FALSE, color = "blue") +
  labs(x = "Trust in Media Index",
       y = "COVID CTB") +
  theme_minimal()

p11 <- ggplot(data, aes(x = TrustScience, y = misinfo_covid_beliefs)) +
  geom_point(alpha = 0.6) + 
  geom_smooth(method = "loess", se = FALSE, color = "blue") +
  labs(x = "Trust in Science",
       y = "COVID CTB") +
  theme_minimal()

p12 <- ggplot(data, aes(x = TrustPolitical_index, y = misinfo_covid_beliefs)) +
  geom_point(alpha = 0.6) + 
  geom_smooth(method = "loess", se = FALSE, color = "blue") +
  labs(x = "Trust in Political Index",
       y = "COVID CTB") +
  theme_minimal()

p13 <- ggplot(data, aes(x = age, y = misinfo_covid_beliefs)) +
  geom_point(alpha = 0.6) + 
  geom_smooth(method = "loess", se = FALSE, color = "blue") +
  labs(x = "Age",
       y = "COVID CTB") +
  theme_minimal()

p14 <- ggplot(data, aes(x = religiousity, y = misinfo_covid_beliefs)) +
  geom_point(alpha = 0.6) + 
  geom_smooth(method = "loess", se = FALSE, color = "blue") +
  labs(x = "Religiousity",
       y = "COVID CTB") +
  theme_minimal()

p15 <- ggplot(data, aes(x = polorientation, y = misinfo_covid_beliefs)) +
  geom_point(alpha = 0.6) + 
  geom_smooth(method = "loess", se = FALSE, color = "blue") +
  labs(x = "Political Orientation",
       y = "COVID CTB") +
  theme_minimal()

p16 <- ggplot(data, aes(x = TrustAltMed_index, y = misinfo_covid_beliefs)) +
  geom_point(alpha = 0.6) + 
  geom_smooth(method = "loess", se = FALSE, color = "blue") +
  labs(x = "Trust Alternative Media",
       y = "COVID CTB") +
  theme_minimal()

# Combine the scatterplots into a grid
grid.arrange(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15, p16, ncol = 3)

#### Rainbow-Test ----
raintest(model_lin)
### Result: assumption of linearity not violated


### Assumption 2: Homoscedasticity (equality of variances) of the residuals ----
plot(model_lin, which = 1)
ols_test_breusch_pagan(model_lin)
### Result: assumption violated, heteroscedasticity detected
### Assumption 3: Normality of residuals  ----
# QQ-Plot
plot(model_lin, which = 2)
# Histogram of residuals
ols_plot_resid_hist(model_lin)
# Shapiro-Wilk-Test
shapiro.test(model_lin$residuals)
### Result: assumption violated, non-normality detected

### Assumption 4: No multicollinearity  ----
car::vif(model_lin)
### Result: assumption given, none of the VIF values ≥ 10

### Assumption 5: No outliers  ----
# Cook's Distanz
plot(model_lin, 4)

# Identify cases where cooks distance is > 4/N
cooks_distance <- cooks.distance(model_lin)
influential_points <- which(cooks_distance > 4/length(cooks_distance))
influential_points

# Identify cases where cooks distance is > 1/N
cooks_distance <- cooks.distance(model_lin)
influential_points <- which(cooks_distance > 1/length(cooks_distance))
influential_points
### Result: assumption violated, several outliers

## Fit the final linear regression model ----
### with robust standard errors to account for heteroskedasticity ----
coeftest(model_lin, vcov = vcovHC(model_lin, "HC1"))
### with bootstrapping to account for non-normality of residuals ----
set.seed(12345)
model_b <- lm(misinfo_covid_beliefs ~ ConspMent_index +MediaUse_traditional_index + MediaUse_online_index +
                MediaUse_socialMedia_index + AltMed_index +
                EpistBel_IT_index + EpistBel_Truth_index + EpistBel_AOT_index +
                SDO_index +
                TrustMedia_index + TrustAltMed_index + TrustScience + TrustPolitical_index +
                gender_male + age + edu_high + religiousity + polorientation, data = ds_analysis_paper2_Misinfo_Ant)
fit_b <- Boot(model_b, R = 5000)
summary(fit_b)
confint(fit_b, level = .95)
hist(fit_b)

### with iterated re-weighted least squares (IRLS) to account for outliers ----
summary(rr.huber <- rlm(misinfo_covid_beliefs ~ ConspMent_index +
                          MediaUse_traditional_index + MediaUse_online_index +
                          MediaUse_socialMedia_index + AltMed_index +
                          EpistBel_IT_index + EpistBel_Truth_index + EpistBel_AOT_index +
                          SDO_index +
                          TrustMedia_index + TrustAltMed_index + TrustScience + TrustPolitical_index +
                          gender_male + age + edu_high + religiousity + polorientation, 
                        data = ds_analysis_paper2_Misinfo_Ant))

## Compare the models accounting for violations of assumptions with initial linear regression model ----
## Export linear regression model as table and graph using sjplot ----
tab_model(model_lin,show.std=TRUE,p.style = "stars",file = "_tables/2_lm_Misinfo.html")

plot_lm<-sjPlot::plot_model(model_lin, sort.est = TRUE,type = "std",show.values = TRUE,value.offset = .4) 
plot_lm
ggsave("_figures/2_lm_Misinfo.jpg", plot = plot_lm)



# --------------------------------------------------------------------------------
